Merge "Suggest words with excessive chars out of proximity chars Bug: 3273807"
This commit is contained in:
commit
4e8dc88407
3 changed files with 61 additions and 46 deletions
|
@ -67,6 +67,7 @@
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||||
|
|
||||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||||
|
@ -75,7 +76,10 @@
|
||||||
|
|
||||||
#define MAX_DEPTH_MULTIPLIER 3
|
#define MAX_DEPTH_MULTIPLIER 3
|
||||||
|
|
||||||
#define MIN_SUGGEST_DEPTH 2
|
// Minimum suggest depth for one word for all cases except for missing space suggestions.
|
||||||
|
#define MIN_SUGGEST_DEPTH 1
|
||||||
|
#define MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION 3
|
||||||
|
#define MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION 3
|
||||||
|
|
||||||
#define min(a,b) ((a)<(b)?(a):(b))
|
#define min(a,b) ((a)<(b)?(a):(b))
|
||||||
|
|
||||||
|
|
|
@ -45,24 +45,25 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
|
||||||
int *frequencies, int *nextLetters, int nextLettersSize)
|
int *frequencies, int *nextLetters, int nextLettersSize)
|
||||||
{
|
{
|
||||||
initSuggestions(codes, codesSize, outWords, frequencies);
|
initSuggestions(codes, codesSize, outWords, frequencies);
|
||||||
|
if (DEBUG_DICT) assert(codesSize == mInputLength);
|
||||||
|
|
||||||
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
const int MAX_DEPTH = min(mInputLength * MAX_DEPTH_MULTIPLIER, MAX_WORD_LENGTH);
|
||||||
getSuggestionCandidates(codesSize, -1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
|
getSuggestionCandidates(-1, -1, -1, nextLetters, nextLettersSize, MAX_DEPTH);
|
||||||
|
|
||||||
// Suggestion with missing character
|
// Suggestion with missing character
|
||||||
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
|
if (SUGGEST_WORDS_WITH_MISSING_CHARACTER) {
|
||||||
for (int i = 0; i < codesSize; ++i) {
|
for (int i = 0; i < codesSize; ++i) {
|
||||||
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
|
if (DEBUG_DICT) LOGI("--- Suggest missing characters %d", i);
|
||||||
getSuggestionCandidates(codesSize, i, -1, -1, NULL, 0, MAX_DEPTH);
|
getSuggestionCandidates(i, -1, -1, NULL, 0, MAX_DEPTH);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suggestion with excessive character
|
// Suggestion with excessive character
|
||||||
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
|
if (SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER
|
||||||
|
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_EXCESSIVE_CHARACTER_SUGGESTION) {
|
||||||
for (int i = 0; i < codesSize; ++i) {
|
for (int i = 0; i < codesSize; ++i) {
|
||||||
if (existsAdjacentProximityChars(i, codesSize)) {
|
|
||||||
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
|
if (DEBUG_DICT) LOGI("--- Suggest excessive characters %d", i);
|
||||||
getSuggestionCandidates(codesSize, -1, i, -1, NULL, 0, MAX_DEPTH);
|
getSuggestionCandidates(-1, i, -1, NULL, 0, MAX_DEPTH);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -71,12 +72,13 @@ int UnigramDictionary::getSuggestions(int *codes, int codesSize, unsigned short
|
||||||
if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
|
if (SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS) {
|
||||||
for (int i = 0; i < codesSize; ++i) {
|
for (int i = 0; i < codesSize; ++i) {
|
||||||
if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i);
|
if (DEBUG_DICT) LOGI("--- Suggest transposed characters %d", i);
|
||||||
getSuggestionCandidates(codesSize, -1, -1, i, NULL, 0, mInputLength - 1);
|
getSuggestionCandidates(-1, -1, i, NULL, 0, mInputLength - 1);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Suggestions with missing space
|
// Suggestions with missing space
|
||||||
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER && mInputLength > MIN_SUGGEST_DEPTH) {
|
if (SUGGEST_WORDS_WITH_MISSING_SPACE_CHARACTER
|
||||||
|
&& mInputLength >= MIN_USER_TYPED_LENGTH_FOR_MISSING_SPACE_SUGGESTION) {
|
||||||
for (int i = 1; i < codesSize; ++i) {
|
for (int i = 1; i < codesSize; ++i) {
|
||||||
if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i);
|
if (DEBUG_DICT) LOGI("--- Suggest missing space characters %d", i);
|
||||||
getMissingSpaceWords(mInputLength, i);
|
getMissingSpaceWords(mInputLength, i);
|
||||||
|
@ -196,13 +198,15 @@ bool UnigramDictionary::sameAsTyped(unsigned short *word, int length) {
|
||||||
static const char QUOTE = '\'';
|
static const char QUOTE = '\'';
|
||||||
static const char SPACE = ' ';
|
static const char SPACE = ' ';
|
||||||
|
|
||||||
void UnigramDictionary::getSuggestionCandidates(const int inputLength, const int skipPos,
|
void UnigramDictionary::getSuggestionCandidates(const int skipPos,
|
||||||
const int excessivePos, const int transposedPos, int *nextLetters,
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
const int nextLettersSize, const int maxDepth) {
|
const int nextLettersSize, const int maxDepth) {
|
||||||
if (DEBUG_DICT) LOGI("getSuggestionCandidates %d", maxDepth);
|
if (DEBUG_DICT) {
|
||||||
if (DEBUG_DICT) assert(transposedPos + 1 < inputLength);
|
LOGI("getSuggestionCandidates %d", maxDepth);
|
||||||
if (DEBUG_DICT) assert(excessivePos < inputLength);
|
assert(transposedPos + 1 < mInputLength);
|
||||||
if (DEBUG_DICT) assert(missingPos < inputLength);
|
assert(excessivePos < mInputLength);
|
||||||
|
assert(missingPos < mInputLength);
|
||||||
|
}
|
||||||
int rootPosition = ROOT_POS;
|
int rootPosition = ROOT_POS;
|
||||||
// Get the number of child of root, then increment the position
|
// Get the number of child of root, then increment the position
|
||||||
int childCount = Dictionary::getCount(DICT, &rootPosition);
|
int childCount = Dictionary::getCount(DICT, &rootPosition);
|
||||||
|
@ -321,41 +325,46 @@ void UnigramDictionary::getWordsRec(const int childrenCount, const int pos, cons
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int snr,
|
||||||
unsigned short *word, const int inputLength, const int depth, const int snr,
|
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
||||||
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
const bool sameLength) {
|
||||||
const int transposedPos, const int freq) {
|
|
||||||
int finalFreq = freq * snr;
|
|
||||||
// TODO: Demote by edit distance
|
// TODO: Demote by edit distance
|
||||||
|
int finalFreq = freq * snr;
|
||||||
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
|
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
|
||||||
if (excessivePos >= 0) finalFreq = finalFreq
|
|
||||||
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
|
|
||||||
if (transposedPos >= 0) finalFreq = finalFreq
|
if (transposedPos >= 0) finalFreq = finalFreq
|
||||||
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
|
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
|
||||||
|
if (excessivePos >= 0) {
|
||||||
|
finalFreq = finalFreq * WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
|
||||||
|
if (!existsAdjacentProximityChars(inputIndex, mInputLength)) {
|
||||||
|
finalFreq = finalFreq
|
||||||
|
* WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE / 100;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||||
|
return finalFreq;
|
||||||
|
}
|
||||||
|
|
||||||
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLength(
|
||||||
|
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
||||||
|
int *nextLetters, const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
|
const int transposedPos, const int freq) {
|
||||||
|
const int finalFreq = calculateFinalFreq(inputIndex, snr, skipPos, excessivePos, transposedPos,
|
||||||
|
freq, false);
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
if (depth >= inputLength && skipPos < 0) {
|
if (depth >= mInputLength && skipPos < 0) {
|
||||||
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
registerNextLetter(mWord[mInputLength], nextLetters, nextLettersSize);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
unsigned short *word, const int depth, const int snr, const int skipPos,
|
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
||||||
const int excessivePos, const int transposedPos, const int freq, const int addedWeight) {
|
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
||||||
if (!sameAsTyped(word, depth + 1)) {
|
const int addedWeight) {
|
||||||
int finalFreq = freq * snr * addedWeight;
|
if (sameAsTyped(word, depth + 1)) return;
|
||||||
// TODO: Demote by edit distance
|
const int finalFreq = calculateFinalFreq(inputIndex, snr * addedWeight, skipPos,
|
||||||
if (skipPos >= 0) finalFreq = finalFreq * WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE / 100;
|
excessivePos, transposedPos, freq, true);
|
||||||
if (excessivePos >= 0) finalFreq = finalFreq
|
// Proximity collection will promote a word of the same length as what user typed.
|
||||||
* WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE / 100;
|
|
||||||
if (transposedPos >= 0) finalFreq = finalFreq
|
|
||||||
* WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE / 100;
|
|
||||||
|
|
||||||
// Proximity collection will promote a word of the same length as
|
|
||||||
// what user typed.
|
|
||||||
if (skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||||
|
@ -437,7 +446,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
|
if (traverseAllNodes || needsToSkipCurrentNode(c, inputIndex, skipPos, depth)) {
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
if (traverseAllNodes && terminal) {
|
if (traverseAllNodes && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, mInputLength, depth,
|
onTerminalWhenUserTypedLengthIsGreaterThanInputLength(mWord, inputIndex, depth,
|
||||||
snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq);
|
snr, nextLetters, nextLettersSize, skipPos, excessivePos, transposedPos, freq);
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
|
@ -462,7 +471,7 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
|
const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
|
||||||
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
|
const bool isSameAsUserTypedLength = mInputLength == inputIndex + 1;
|
||||||
if (isSameAsUserTypedLength && terminal) {
|
if (isSameAsUserTypedLength && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, depth, snr,
|
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
|
||||||
skipPos, excessivePos, transposedPos, freq, addedWeight);
|
skipPos, excessivePos, transposedPos, freq, addedWeight);
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
|
|
|
@ -31,7 +31,7 @@ public:
|
||||||
|
|
||||||
private:
|
private:
|
||||||
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
void initSuggestions(int *codes, int codesSize, unsigned short *outWords, int *frequencies);
|
||||||
void getSuggestionCandidates(const int inputLength, const int skipPos, const int excessivePos,
|
void getSuggestionCandidates(const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
const int transposedPos, int *nextLetters, const int nextLettersSize,
|
||||||
const int maxDepth);
|
const int maxDepth);
|
||||||
void getVersionNumber();
|
void getVersionNumber();
|
||||||
|
@ -52,13 +52,15 @@ private:
|
||||||
const int excessivePos, const int transposedPos, int *nextLetters,
|
const int excessivePos, const int transposedPos, int *nextLetters,
|
||||||
const int nextLettersSize);
|
const int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
|
int calculateFinalFreq(const int inputIndex, const int snr, const int skipPos,
|
||||||
|
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
const int mInputLength, const int depth, const int snr, int *nextLetters,
|
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
||||||
const int nextLettersSize, const int skipPos, const int excessivePos,
|
const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
const int transposedPos, const int freq);
|
const int transposedPos, const int freq);
|
||||||
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word, const int depth,
|
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
|
||||||
const int snr, const int skipPos, const int excessivePos, const int transposedPos,
|
const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||||
const int freq, const int addedWeight);
|
const int excessivePos, const int transposedPos, const int freq, const int addedWeight);
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,
|
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,
|
||||||
|
|
Loading…
Reference in a new issue