Promote full matches with differing accents.
Stop considering accented characters as different from their base character for proximity scoring. Also give a huge boost (basically overriding frequency) to a word fully matched with only differing accents. Bug: 2550587 Change-Id: I2da7a71229fb3868d9e4a53703ccf8caeb6fcf10main
parent
588d2a525c
commit
8dc754a411
|
@ -129,6 +129,7 @@ static void prof_out(void) {
|
||||||
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
#define SUGGEST_WORDS_WITH_EXCESSIVE_CHARACTER true
|
||||||
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
|
#define SUGGEST_WORDS_WITH_TRANSPOSED_CHARACTERS true
|
||||||
|
|
||||||
|
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
|
@ -136,6 +137,9 @@ static void prof_out(void) {
|
||||||
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
#define WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE 60
|
||||||
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
|
#define FULL_MATCHED_WORDS_PROMOTION_RATE 120
|
||||||
|
|
||||||
|
// This is used as a bare multiplier (not subject to /100)
|
||||||
|
#define FULL_MATCH_ACCENTS_OR_CAPITALIZATION_DIFFER_MULTIPLIER 2
|
||||||
|
|
||||||
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
// This should be greater than or equal to MAX_WORD_LENGTH defined in BinaryDictionary.java
|
||||||
// This is only used for the size of array. Not to be used in c functions.
|
// This is only used for the size of array. Not to be used in c functions.
|
||||||
#define MAX_WORD_LENGTH_INTERNAL 48
|
#define MAX_WORD_LENGTH_INTERNAL 48
|
||||||
|
|
|
@ -363,10 +363,15 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int
|
||||||
}
|
}
|
||||||
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
int lengthFreq = TYPED_LETTER_MULTIPLIER;
|
||||||
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
|
for (int i = 0; i < depth; ++i) lengthFreq *= TYPED_LETTER_MULTIPLIER;
|
||||||
if (depth > 1 && lengthFreq == snr) {
|
if (lengthFreq == snr) {
|
||||||
|
if (depth > 1) {
|
||||||
if (DEBUG_DICT) LOGI("Found full matched word.");
|
if (DEBUG_DICT) LOGI("Found full matched word.");
|
||||||
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
multiplyRate(FULL_MATCHED_WORDS_PROMOTION_RATE, &finalFreq);
|
||||||
}
|
}
|
||||||
|
if (sameLength && transposedPos < 0 && skipPos < 0 && excessivePos < 0) {
|
||||||
|
finalFreq *= FULL_MATCH_ACCENTS_OR_CAPITALIZATION_DIFFER_MULTIPLIER;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
if (sameLength && skipPos < 0) finalFreq *= FULL_WORD_MULTIPLIER;
|
||||||
return finalFreq;
|
return finalFreq;
|
||||||
}
|
}
|
||||||
|
@ -385,10 +390,9 @@ inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsGreaterThanInputLe
|
||||||
|
|
||||||
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
inline void UnigramDictionary::onTerminalWhenUserTypedLengthIsSameAsInputLength(
|
||||||
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
unsigned short *word, const int inputIndex, const int depth, const int snr,
|
||||||
const int skipPos, const int excessivePos, const int transposedPos, const int freq,
|
const int skipPos, const int excessivePos, const int transposedPos, const int freq) {
|
||||||
const int addedWeight) {
|
|
||||||
if (sameAsTyped(word, depth + 1)) return;
|
if (sameAsTyped(word, depth + 1)) return;
|
||||||
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr * addedWeight, skipPos,
|
const int finalFreq = calculateFinalFreq(inputIndex, depth, snr, skipPos,
|
||||||
excessivePos, transposedPos, freq, true);
|
excessivePos, transposedPos, freq, true);
|
||||||
// Proximity collection will promote a word of the same length as what user typed.
|
// Proximity collection will promote a word of the same length as what user typed.
|
||||||
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
if (depth >= MIN_SUGGEST_DEPTH) addWord(word, depth + 1, finalFreq);
|
||||||
|
@ -424,9 +428,9 @@ inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
inline UnigramDictionary::ProximityType UnigramDictionary::getMatchedProximityId(
|
||||||
const unsigned short c, const int skipPos, const int excessivePos,
|
const int *currentChars, const unsigned short c, const int skipPos,
|
||||||
const int transposedPos) {
|
const int excessivePos, const int transposedPos) {
|
||||||
const unsigned short lowerC = toLowerCase(c);
|
const unsigned short lowerC = toLowerCase(c);
|
||||||
int j = 0;
|
int j = 0;
|
||||||
while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
|
while (currentChars[j] > 0 && j < MAX_PROXIMITY_CHARS) {
|
||||||
|
@ -434,18 +438,19 @@ inline int UnigramDictionary::getMatchedProximityId(const int *currentChars,
|
||||||
// If skipPos is defined, not to search proximity collections.
|
// If skipPos is defined, not to search proximity collections.
|
||||||
// First char is what user typed.
|
// First char is what user typed.
|
||||||
if (matched) {
|
if (matched) {
|
||||||
return j;
|
if (j > 0) return NEAR_PROXIMITY_CHAR;
|
||||||
|
return SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR;
|
||||||
} else if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0) {
|
} else if (skipPos >= 0 || excessivePos >= 0 || transposedPos >= 0) {
|
||||||
// Not to check proximity characters
|
// Not to check proximity characters
|
||||||
return -1;
|
return UNRELATED_CHAR;
|
||||||
}
|
}
|
||||||
++j;
|
++j;
|
||||||
}
|
}
|
||||||
return -1;
|
return UNRELATED_CHAR;
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth,
|
||||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
const int maxDepth, const bool traverseAllNodes, int snr, int inputIndex,
|
||||||
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
const int diffs, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
int *nextLetters, const int nextLettersSize, int *newCount, int *newChildPosition,
|
||||||
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
bool *newTraverseAllNodes, int *newSnr, int*newInputIndex, int *newDiffs,
|
||||||
|
@ -492,22 +497,24 @@ inline bool UnigramDictionary::processCurrentNode(const int pos, const int depth
|
||||||
|
|
||||||
int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,
|
int matchedProximityCharId = getMatchedProximityId(currentChars, c, skipPos, excessivePos,
|
||||||
transposedPos);
|
transposedPos);
|
||||||
if (matchedProximityCharId < 0) return false;
|
if (UNRELATED_CHAR == matchedProximityCharId) return false;
|
||||||
mWord[depth] = c;
|
mWord[depth] = c;
|
||||||
// If inputIndex is greater than mInputLength, that means there is no
|
// If inputIndex is greater than mInputLength, that means there is no
|
||||||
// proximity chars. So, we don't need to check proximity.
|
// proximity chars. So, we don't need to check proximity.
|
||||||
const int addedWeight = matchedProximityCharId == 0 ? TYPED_LETTER_MULTIPLIER : 1;
|
if (SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR == matchedProximityCharId) {
|
||||||
|
snr = snr * TYPED_LETTER_MULTIPLIER;
|
||||||
|
}
|
||||||
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|
bool isSameAsUserTypedLength = mInputLength == inputIndex + 1
|
||||||
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
|
|| (excessivePos == mInputLength - 1 && inputIndex == mInputLength - 2);
|
||||||
if (isSameAsUserTypedLength && terminal) {
|
if (isSameAsUserTypedLength && terminal) {
|
||||||
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
|
onTerminalWhenUserTypedLengthIsSameAsInputLength(mWord, inputIndex, depth, snr,
|
||||||
skipPos, excessivePos, transposedPos, freq, addedWeight);
|
skipPos, excessivePos, transposedPos, freq);
|
||||||
}
|
}
|
||||||
if (!needsToTraverseChildrenNodes) return false;
|
if (!needsToTraverseChildrenNodes) return false;
|
||||||
// Start traversing all nodes after the index exceeds the user typed length
|
// Start traversing all nodes after the index exceeds the user typed length
|
||||||
*newTraverseAllNodes = isSameAsUserTypedLength;
|
*newTraverseAllNodes = isSameAsUserTypedLength;
|
||||||
*newSnr = snr * addedWeight;
|
*newSnr = snr;
|
||||||
*newDiffs = diffs + ((matchedProximityCharId > 0) ? 1 : 0);
|
*newDiffs = diffs + ((NEAR_PROXIMITY_CHAR == matchedProximityCharId) ? 1 : 0);
|
||||||
*newInputIndex = inputIndex + 1;
|
*newInputIndex = inputIndex + 1;
|
||||||
}
|
}
|
||||||
// Optimization: Prune out words that are too long compared to how much was typed.
|
// Optimization: Prune out words that are too long compared to how much was typed.
|
||||||
|
|
|
@ -22,6 +22,13 @@
|
||||||
namespace latinime {
|
namespace latinime {
|
||||||
|
|
||||||
class UnigramDictionary {
|
class UnigramDictionary {
|
||||||
|
|
||||||
|
typedef enum { // Used as a return value for character comparison
|
||||||
|
SAME_OR_ACCENTED_OR_CAPITALIZED_CHAR, // Same char, possibly with different case or accent
|
||||||
|
NEAR_PROXIMITY_CHAR, // It is a char located nearby on the keyboard
|
||||||
|
UNRELATED_CHAR // It is an unrelated char
|
||||||
|
} ProximityType;
|
||||||
|
|
||||||
public:
|
public:
|
||||||
UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
|
UnigramDictionary(const unsigned char *dict, int typedLetterMultipler, int fullWordMultiplier,
|
||||||
int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
|
int maxWordLength, int maxWords, int maxProximityChars, const bool isLatestDictVersion);
|
||||||
|
@ -60,11 +67,11 @@ private:
|
||||||
const int transposedPos, const int freq);
|
const int transposedPos, const int freq);
|
||||||
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsSameAsInputLength(unsigned short *word,
|
||||||
const int inputIndex, const int depth, const int snr, const int skipPos,
|
const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||||
const int excessivePos, const int transposedPos, const int freq, const int addedWeight);
|
const int excessivePos, const int transposedPos, const int freq);
|
||||||
bool needsToSkipCurrentNode(const unsigned short c,
|
bool needsToSkipCurrentNode(const unsigned short c,
|
||||||
const int inputIndex, const int skipPos, const int depth);
|
const int inputIndex, const int skipPos, const int depth);
|
||||||
int getMatchedProximityId(const int *currentChars, const unsigned short c, const int skipPos,
|
ProximityType getMatchedProximityId(const int *currentChars, const unsigned short c,
|
||||||
const int excessivePos, const int transposedPos);
|
const int skipPos, const int excessivePos, const int transposedPos);
|
||||||
// Process a node by considering proximity, missing and excessive character
|
// Process a node by considering proximity, missing and excessive character
|
||||||
bool processCurrentNode(const int pos, const int depth,
|
bool processCurrentNode(const int pos, const int depth,
|
||||||
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
const int maxDepth, const bool traverseAllNodes, const int snr, int inputIndex,
|
||||||
|
|
Loading…
Reference in New Issue