Merge "Demote skipped characters matched words with respect to length." into honeycomb-mr1
commit
eaecb56f94
|
@ -230,6 +230,16 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
return (node == null) ? -1 : node.mFrequency;
|
return (node == null) ? -1 : node.mFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
|
||||||
|
// The computation itself makes sense for >= 2, but the == 2 case returns 0
|
||||||
|
// anyway so we may as well test against 3 instead and return the constant
|
||||||
|
if (inputLength >= 3) {
|
||||||
|
return (freq * snr * (inputLength - 2)) / (inputLength - 1);
|
||||||
|
} else {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Recursively traverse the tree for words that match the input. Input consists of
|
* Recursively traverse the tree for words that match the input. Input consists of
|
||||||
* a list of arrays. Each item in the list is one input character position. An input
|
* a list of arrays. Each item in the list is one input character position. An input
|
||||||
|
@ -243,13 +253,14 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
* @param completion whether the traversal is now in completion mode - meaning that we've
|
* @param completion whether the traversal is now in completion mode - meaning that we've
|
||||||
* exhausted the input and we're looking for all possible suffixes.
|
* exhausted the input and we're looking for all possible suffixes.
|
||||||
* @param snr current weight of the word being formed
|
* @param snr current weight of the word being formed
|
||||||
* @param inputIndex position in the input characters. This can be off from the depth in
|
* @param inputIndex position in the input characters. This can be off from the depth in
|
||||||
* case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
|
* case we skip over some punctuations such as apostrophe in the traversal. That is, if you type
|
||||||
* "wouldve", it could be matching "would've", so the depth will be one more than the
|
* "wouldve", it could be matching "would've", so the depth will be one more than the
|
||||||
* inputIndex
|
* inputIndex
|
||||||
* @param callback the callback class for adding a word
|
* @param callback the callback class for adding a word
|
||||||
*/
|
*/
|
||||||
protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
|
// TODO: Share this routine with the native code for BinaryDictionary
|
||||||
|
protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
|
||||||
final int depth, boolean completion, int snr, int inputIndex, int skipPos,
|
final int depth, boolean completion, int snr, int inputIndex, int skipPos,
|
||||||
WordCallback callback) {
|
WordCallback callback) {
|
||||||
final int count = roots.mLength;
|
final int count = roots.mLength;
|
||||||
|
@ -275,8 +286,14 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
if (completion) {
|
if (completion) {
|
||||||
word[depth] = c;
|
word[depth] = c;
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId,
|
final int finalFreq;
|
||||||
DataType.UNIGRAM)) {
|
if (skipPos < 0) {
|
||||||
|
finalFreq = freq * snr;
|
||||||
|
} else {
|
||||||
|
finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
|
||||||
|
}
|
||||||
|
if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
|
||||||
|
DataType.UNIGRAM)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -288,7 +305,7 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
// Skip the ' and continue deeper
|
// Skip the ' and continue deeper
|
||||||
word[depth] = c;
|
word[depth] = c;
|
||||||
if (children != null) {
|
if (children != null) {
|
||||||
getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
|
getWordsRec(children, codes, word, depth + 1, completion, snr, inputIndex,
|
||||||
skipPos, callback);
|
skipPos, callback);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
@ -305,10 +322,16 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
|
|
||||||
if (codeSize == inputIndex + 1) {
|
if (codeSize == inputIndex + 1) {
|
||||||
if (terminal) {
|
if (terminal) {
|
||||||
if (INCLUDE_TYPED_WORD_IF_VALID
|
if (INCLUDE_TYPED_WORD_IF_VALID
|
||||||
|| !same(word, depth + 1, codes.getTypedWord())) {
|
|| !same(word, depth + 1, codes.getTypedWord())) {
|
||||||
int finalFreq = freq * snr * addedAttenuation;
|
final int finalFreq;
|
||||||
if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER;
|
if (skipPos < 0) {
|
||||||
|
finalFreq = freq * snr * addedAttenuation
|
||||||
|
* FULL_WORD_FREQ_MULTIPLIER;
|
||||||
|
} else {
|
||||||
|
finalFreq = computeSkippedWordFinalFreq(freq,
|
||||||
|
snr * addedAttenuation, mInputLength);
|
||||||
|
}
|
||||||
callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
|
callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
|
||||||
DataType.UNIGRAM);
|
DataType.UNIGRAM);
|
||||||
}
|
}
|
||||||
|
@ -319,7 +342,7 @@ public class ExpandableDictionary extends Dictionary {
|
||||||
skipPos, callback);
|
skipPos, callback);
|
||||||
}
|
}
|
||||||
} else if (children != null) {
|
} else if (children != null) {
|
||||||
getWordsRec(children, codes, word, depth + 1,
|
getWordsRec(children, codes, word, depth + 1,
|
||||||
false, snr * addedAttenuation, inputIndex + 1,
|
false, snr * addedAttenuation, inputIndex + 1,
|
||||||
skipPos, callback);
|
skipPos, callback);
|
||||||
}
|
}
|
||||||
|
|
|
@ -138,7 +138,7 @@ static void prof_out(void) {
|
||||||
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
|
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
|
||||||
|
|
||||||
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
|
||||||
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 100
|
||||||
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
|
||||||
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75
|
||||||
|
|
|
@ -493,10 +493,17 @@ static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(con
|
||||||
}
|
}
|
||||||
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
|
||||||
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
|
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
|
||||||
const int freq, const bool sameLength) {
|
const int freq, const bool sameLength) const {
|
||||||
// TODO: Demote by edit distance
|
// TODO: Demote by edit distance
|
||||||
int finalFreq = freq * matchWeight;
|
int finalFreq = freq * matchWeight;
|
||||||
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq);
|
if (skipPos >= 0) {
|
||||||
|
if (mInputLength >= 3) {
|
||||||
|
multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
|
||||||
|
(mInputLength - 2) / (mInputLength - 1), &finalFreq);
|
||||||
|
} else {
|
||||||
|
finalFreq = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
if (transposedPos >= 0) multiplyRate(
|
if (transposedPos >= 0) multiplyRate(
|
||||||
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
|
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
|
||||||
if (excessivePos >= 0) {
|
if (excessivePos >= 0) {
|
||||||
|
@ -550,7 +557,7 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
|
||||||
}
|
}
|
||||||
|
|
||||||
inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
|
inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
|
||||||
const int inputLength) {
|
const int inputLength) const {
|
||||||
if (inputIndex < 0 || inputIndex >= inputLength) return false;
|
if (inputIndex < 0 || inputIndex >= inputLength) return false;
|
||||||
const int currentChar = *getInputCharsAt(inputIndex);
|
const int currentChar = *getInputCharsAt(inputIndex);
|
||||||
const int leftIndex = inputIndex - 1;
|
const int leftIndex = inputIndex - 1;
|
||||||
|
|
|
@ -75,7 +75,8 @@ private:
|
||||||
const int nextLettersSize);
|
const int nextLettersSize);
|
||||||
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
|
||||||
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
|
||||||
const int excessivePos, const int transposedPos, const int freq, const bool sameLength);
|
const int excessivePos, const int transposedPos, const int freq,
|
||||||
|
const bool sameLength) const;
|
||||||
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
|
||||||
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
const int inputIndex, const int depth, const int snr, int *nextLetters,
|
||||||
const int nextLettersSize, const int skipPos, const int excessivePos,
|
const int nextLettersSize, const int skipPos, const int excessivePos,
|
||||||
|
@ -99,8 +100,8 @@ private:
|
||||||
bool processCurrentNodeForExactMatch(const int firstChildPos,
|
bool processCurrentNodeForExactMatch(const int firstChildPos,
|
||||||
const int startInputIndex, const int depth, unsigned short *word,
|
const int startInputIndex, const int depth, unsigned short *word,
|
||||||
int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
|
int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
|
||||||
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength);
|
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
|
||||||
inline const int* getInputCharsAt(const int index) {
|
inline const int* getInputCharsAt(const int index) const {
|
||||||
return mInputCodes + (index * MAX_PROXIMITY_CHARS);
|
return mInputCodes + (index * MAX_PROXIMITY_CHARS);
|
||||||
}
|
}
|
||||||
const unsigned char *DICT;
|
const unsigned char *DICT;
|
||||||
|
|
Loading…
Reference in New Issue