Merge "Suggest words with excessive chars out of proximity chars Bug: 3273807"

This commit is contained in:
satok 2010-12-13 00:46:17 -08:00 committed by Android (Google) Code Review
commit 4e8dc88407
3 changed files with 61 additions and 46 deletions

View file

@ -67,6 +67,7 @@
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
@ -75,7 +76,10 @@
#define MAX_DEPTH_MULTIPLIER 3
#define MIN_SUGGEST_DEPTH 2
// Minimum suggest depth for one word for all cases except for missing space suggestions.
#define MIN_SUGGEST_DEPTH 1
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
#define min(a,b) ((a)<(b)?(a):(b))

View file

@ -45,24 +45,25 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
int *frequencies, int *nextLetters, int nextLettersSize)
{
initSuggestions(codes, codesSize, outWords, frequencies);
if (DEBUG_DICT) assert(codesSize == mInputLength);
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
getSuggestionCandidates(codesSize, -1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
getSuggestionCandidates(-1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
// Suggestion with missing character
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
for (int i = 0; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
getSuggestionCandidates(codesSize, i, -1, -1, NULL, 0, MAX_DEPTH);
getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);
}
}
// Suggestion with excessive character
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {
for (int i = 0; i < codesSize; ++i) {
if (existsAdjacentProximityChars(i, codesSize)) {
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
getSuggestionCandidates(codesSize, -1, i, -1, NULL, 0, MAX_DEPTH);
}
getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);
}
}
@ -71,12 +72,13 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
for (int i = 0; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i);
getSuggestionCandidates(codesSize, -1, -1, i, NULL, 0, mInputLength - 1);
getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);
}
}
// Suggestions with missing space
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
for (int i = 1; i < codesSize; ++i) {
if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i);
getMissingSpaceWords(mInputLength, i);
@ -196,13 +198,15 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
static const char QUOTE = '\'';
static const char SPACE = ' ';
void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int skipPos,
void UnigramDictionary::getSuggestionCandidates(const int skipPos,
const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize, const int maxDepth) {
if (DEBUG_DICT) LOGI("getSuggestionCandidates %d", maxDepth);
if (DEBUG_DICT) assert(transposedPos + 1 < inputLength);
if (DEBUG_DICT) assert(excessivePos < inputLength);
if (DEBUG_DICT) assert(missingPos < inputLength);
if (DEBUG_DICT) {
LOGI("getSuggestionCandidates %d", maxDepth);
assert(transposedPos + 1 < mInputLength);
assert(excessivePos < mInputLength);
assert(missingPos < mInputLength);
}
int rootPosition = ROOT_POS;
// Get the number of child of root, then increment the position
int childCount = Dictionary::getCount(DICT, &rootPosition);
@ -321,42 +325,47 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
}
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
unsigned short *word, const int inputLength, const int depth, const int snr,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq) {
int finalFreq = freq * snr;
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
const bool sameLength) {
// TODO: Demote by edit distance
int finalFreq = freq * snr;
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
if (excessivePos >= 0) finalFreq = finalFreq
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
if (transposedPos >= 0) finalFreq = finalFreq
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
if (excessivePos >= 0) {
finalFreq = finalFreq * WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
finalFreq = finalFreq
* WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE / 100;
}
}
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
return finalFreq;
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
unsigned short *word, const int inputIndex, const int depth, const int snr,
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq) {
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
freq, false);
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
if (depth >= inputLength && skipPos < 0) {
if (depth >= mInputLength && skipPos < 0) {
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
}
}
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
unsigned short *word, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const int addedWeight) {
if (!sameAsTyped(word, depth + 1)) {
int finalFreq = freq * snr * addedWeight;
// TODO: Demote by edit distance
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
if (excessivePos >= 0) finalFreq = finalFreq
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
if (transposedPos >= 0) finalFreq = finalFreq
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
// Proximity collection will promote a word of the same length as
// what user typed.
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
unsigned short *word, const int inputIndex, const int depth, const int snr,
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
const int addedWeight) {
if (sameAsTyped(word, depth + 1)) return;
const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
excessivePos, transposedPos, freq, true);
// Proximity collection will promote a word of the same length as what user typed.
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
}
}
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth) {
@ -437,7 +446,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
mWord[depth] = c;
if (traverseAllNodes && terminal) {
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq);
}
if (!needsToTraverseChildrenNodes) return false;
@ -462,7 +471,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
if (isSameAsUserTypedLength && terminal) {
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
skipPos, excessivePos, transposedPos, freq, addedWeight);
}
if (!needsToTraverseChildrenNodes) return false;

View file

@ -31,7 +31,7 @@ public:
private:
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
void getSuggestionCandidates(const int inputLength, const int skipPos, const int excessivePos,
void getSuggestionCandidates(const int skipPos, const int excessivePos,
const int transposedPos, int *nextLetters, const int nextLettersSize,
const int maxDepth);
void getVersionNumber();
@ -52,13 +52,15 @@ private:
const int excessivePos, const int transposedPos, int *nextLetters,
const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int mInputLength, const int depth, const int snr, int *nextLetters,
const int inputIndex, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int excessivePos,
const int transposedPos, const int freq);
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
const int snr, const int skipPos, const int excessivePos, const int transposedPos,
const int freq, const int addedWeight);
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
const int inputIndex, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const int addedWeight);
bool needsToSkipCurrentNode(const unsigned short c,
const int inputIndex, const int skipPos, const int depth);
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,