Merge "Suggest words with excessive chars out of proximity chars Bug: 3273807"

This commit is contained in:
satok 2010-12-13 00:46:17 -08:00 committed by Android (Google) Code Review
commit 4e8dc88407
3 changed files with 61 additions and 46 deletions

View file

@ -67,6 +67,7 @@
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60 #define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java // This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
@ -75,7 +76,10 @@
#define MAX_DEPTH_MULTIPLIER 3 #define MAX_DEPTH_MULTIPLIER 3
#define MIN_SUGGEST_DEPTH 2 // Minimum suggest depth for one word for all cases except for missing space suggestions.
#define MIN_SUGGEST_DEPTH 1
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
#define min(a,b) ((a)<(b)?(a):(b)) #define min(a,b) ((a)<(b)?(a):(b))

View file

@ -45,24 +45,25 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
int *frequencies, int *nextLetters, int nextLettersSize) int *frequencies, int *nextLetters, int nextLettersSize)
{ {
initSuggestions(codes, codesSize, outWords, frequencies); initSuggestions(codes, codesSize, outWords, frequencies);
if (DEBUG_DICT) assert(codesSize == mInputLength);
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH); const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
getSuggestionCandidates(codesSize, -1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH); getSuggestionCandidates(-1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
// Suggestion with missing character // Suggestion with missing character
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) { if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
for (int i = 0; i < codesSize; ++i) { for (int i = 0; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i); if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
getSuggestionCandidates(codesSize, i, -1, -1, NULL, 0, MAX_DEPTH); getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);
} }
} }
// Suggestion with excessive character // Suggestion with excessive character
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) { if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {
for (int i = 0; i < codesSize; ++i) { for (int i = 0; i < codesSize; ++i) {
if (existsAdjacentProximityChars(i, codesSize)) { if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i); getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);
getSuggestionCandidates(codesSize, -1, i, -1, NULL, 0, MAX_DEPTH);
}
} }
} }
@ -71,12 +72,13 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) { if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
for (int i = 0; i < codesSize; ++i) { for (int i = 0; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i); if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i);
getSuggestionCandidates(codesSize, -1, -1, i, NULL, 0, mInputLength - 1); getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);
} }
} }
// Suggestions with missing space // Suggestions with missing space
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) { if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
for (int i = 1; i < codesSize; ++i) { for (int i = 1; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i); if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i);
getMissingSpaceWords(mInputLength, i); getMissingSpaceWords(mInputLength, i);
@ -196,13 +198,15 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
static const char QUOTE = '\''; static const char QUOTE = '\'';
static const char SPACE = ' '; static const char SPACE = ' ';
void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int skipPos, void UnigramDictionary::getSuggestionCandidates(const int skipPos,
const int excessivePos, const int transposedPos, int *nextLetters, const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize, const int maxDepth) { const int nextLettersSize, const int maxDepth) {
if (DEBUG_DICT) LOGI("getSuggestionCandidates %d", maxDepth); if (DEBUG_DICT) {
if (DEBUG_DICT) assert(transposedPos + 1 < inputLength); LOGI("getSuggestionCandidates %d", maxDepth);
if (DEBUG_DICT) assert(excessivePos < inputLength); assert(transposedPos + 1 < mInputLength);
if (DEBUG_DICT) assert(missingPos < inputLength); assert(excessivePos < mInputLength);
assert(missingPos < mInputLength);
}
int rootPosition = ROOT_POS; int rootPosition = ROOT_POS;
// Get the number of child of root, then increment the position // Get the number of child of root, then increment the position
int childCount = Dictionary::getCount(DICT, &rootPosition); int childCount = Dictionary::getCount(DICT, &rootPosition);
@ -321,41 +325,46 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
} }
} }
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength( inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
unsigned short *word, const int inputLength, const int depth, const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int freq,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos, const bool sameLength) {
const int transposedPos, const int freq) {
int finalFreq = freq * snr;
// TODO: Demote by edit distance // TODO: Demote by edit distance
int finalFreq = freq * snr;
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100; if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
if (excessivePos >= 0) finalFreq = finalFreq
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
if (transposedPos >= 0) finalFreq = finalFreq if (transposedPos >= 0) finalFreq = finalFreq
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100; * WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
if (excessivePos >= 0) {
finalFreq = finalFreq * WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
finalFreq = finalFreq
* WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE / 100;
}
}
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
return finalFreq;
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
unsigned short *word, const int inputIndex, const int depth, const int snr,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq) {
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
freq, false);
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq); if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
if (depth >= inputLength && skipPos < 0) { if (depth >= mInputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize); registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
} }
} }
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength( inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
unsigned short *word, const int depth, const int snr, const int skipPos, unsigned short *word, const int inputIndex, const int depth, const int snr,
const int excessivePos, const int transposedPos, const int freq, const int addedWeight) { const int skipPos, const int excessivePos, const int transposedPos, const int freq,
if (!sameAsTyped(word, depth + 1)) { const int addedWeight) {
int finalFreq = freq * snr * addedWeight; if (sameAsTyped(word, depth + 1)) return;
// TODO: Demote by edit distance const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100; excessivePos, transposedPos, freq, true);
if (excessivePos >= 0) finalFreq = finalFreq // Proximity collection will promote a word of the same length as what user typed.
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100; if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
if (transposedPos >= 0) finalFreq = finalFreq
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
// Proximity collection will promote a word of the same length as
// what user typed.
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
}
} }
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c, inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
@ -437,7 +446,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) { if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
mWord[depth] = c; mWord[depth] = c;
if (traverseAllNodes && terminal) { if (traverseAllNodes && terminal) {
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth, onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq); snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq);
} }
if (!needsToTraverseChildrenNodes) return false; if (!needsToTraverseChildrenNodes) return false;
@ -462,7 +471,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1; const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1; const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
if (isSameAsUserTypedLength && terminal) { if (isSameAsUserTypedLength && terminal) {
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr, onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
skipPos, excessivePos, transposedPos, freq, addedWeight); skipPos, excessivePos, transposedPos, freq, addedWeight);
} }
if (!needsToTraverseChildrenNodes) return false; if (!needsToTraverseChildrenNodes) return false;

View file

@ -31,7 +31,7 @@ public:
private: private:
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies); void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
void getSuggestionCandidates(const int inputLength, const int skipPos, const int excessivePos, void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos, int *nextLetters, const int nextLettersSize, const int transposedPos, int *nextLetters, const int nextLettersSize,
const int maxDepth); const int maxDepth);
void getVersionNumber(); void getVersionNumber();
@ -52,13 +52,15 @@ private:
const int excessivePos, const int transposedPos, int *nextLetters, const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize); const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int mInputLength, const int depth, const int snr, int *nextLetters, const int inputIndex, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int excessivePos, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq); const int transposedPos, const int freq);
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth, void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
const int snr, const int skipPos, const int excessivePos, const int transposedPos, const int inputIndex, const int depth, const int snr, const int skipPos,
const int freq, const int addedWeight); const int excessivePos, const int transposedPos, const int freq, const int addedWeight);
bool needsToSkipCurrentNode(const unsigned short c, bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth); const int inputIndex, const int skipPos, const int depth);
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos, int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,