am eaecb56f: Merge "Demote skipped characters matched words with respect to length." into honeycomb-mr1

* commit 'eaecb56f948a4979e72346f6c5c64b56f7bc7bbf':
  Demote skipped characters matched words with respect to length.
This commit is contained in:
Jean Chalard 2011-03-04 22:46:13 -08:00 committed by Android Git Automerger
commit 8867737adf
4 changed files with 47 additions and 16 deletions

View file

@ -230,6 +230,16 @@ public class ExpandableDictionary extends Dictionary {
return (node == null) ? -1 : node.mFrequency; return (node == null) ? -1 : node.mFrequency;
} }
private static int computeSkippedWordFinalFreq(int freq, int snr, int inputLength) {
// The computation itself makes sense for >= 2, but the == 2 case returns 0
// anyway so we may as well test against 3 instead and return the constant
if (inputLength >= 3) {
return (freq * snr * (inputLength - 2)) / (inputLength - 1);
} else {
return 0;
}
}
/** /**
* Recursively traverse the tree for words that match the input. Input consists of * Recursively traverse the tree for words that match the input. Input consists of
* a list of arrays. Each item in the list is one input character position. An input * a list of arrays. Each item in the list is one input character position. An input
@ -249,6 +259,7 @@ public class ExpandableDictionary extends Dictionary {
* inputIndex * inputIndex
* @param callback the callback class for adding a word * @param callback the callback class for adding a word
*/ */
// TODO: Share this routine with the native code for BinaryDictionary
protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word, protected void getWordsRec(NodeArray roots, final WordComposer codes, final char[] word,
final int depth, boolean completion, int snr, int inputIndex, int skipPos, final int depth, boolean completion, int snr, int inputIndex, int skipPos,
WordCallback callback) { WordCallback callback) {
@ -275,8 +286,14 @@ public class ExpandableDictionary extends Dictionary {
if (completion) { if (completion) {
word[depth] = c; word[depth] = c;
if (terminal) { if (terminal) {
if (!callback.addWord(word, 0, depth + 1, freq * snr, mDicTypeId, final int finalFreq;
DataType.UNIGRAM)) { if (skipPos < 0) {
finalFreq = freq * snr;
} else {
finalFreq = computeSkippedWordFinalFreq(freq, snr, mInputLength);
}
if (!callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
DataType.UNIGRAM)) {
return; return;
} }
} }
@ -307,8 +324,14 @@ public class ExpandableDictionary extends Dictionary {
if (terminal) { if (terminal) {
if (INCLUDE_TYPED_WORD_IF_VALID if (INCLUDE_TYPED_WORD_IF_VALID
|| !same(word, depth + 1, codes.getTypedWord())) { || !same(word, depth + 1, codes.getTypedWord())) {
int finalFreq = freq * snr * addedAttenuation; final int finalFreq;
if (skipPos < 0) finalFreq *= FULL_WORD_FREQ_MULTIPLIER; if (skipPos < 0) {
finalFreq = freq * snr * addedAttenuation
* FULL_WORD_FREQ_MULTIPLIER;
} else {
finalFreq = computeSkippedWordFinalFreq(freq,
snr * addedAttenuation, mInputLength);
}
callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId, callback.addWord(word, 0, depth + 1, finalFreq, mDicTypeId,
DataType.UNIGRAM); DataType.UNIGRAM);
} }

View file

@ -138,7 +138,7 @@ static void prof_out(void) {
#define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true #define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true
// The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent.
#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 100
#define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80
#define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75
#define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75

View file

@ -493,10 +493,17 @@ static inline int capped255MultForFullMatchAccentsOrCapitalizationDifference(con
} }
inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth, inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int depth,
const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos, const int matchWeight, const int skipPos, const int excessivePos, const int transposedPos,
const int freq, const bool sameLength) { const int freq, const bool sameLength) const {
// TODO: Demote by edit distance // TODO: Demote by edit distance
int finalFreq = freq * matchWeight; int finalFreq = freq * matchWeight;
if (skipPos >= 0) multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE, &finalFreq); if (skipPos >= 0) {
if (mInputLength >= 3) {
multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE *
(mInputLength - 2) / (mInputLength - 1), &finalFreq);
} else {
finalFreq = 0;
}
}
if (transposedPos >= 0) multiplyRate( if (transposedPos >= 0) multiplyRate(
WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq); WORDS_WITH_TRANSPOSED_CHARACTERS_DEMOTION_RATE, &finalFreq);
if (excessivePos >= 0) { if (excessivePos >= 0) {
@ -550,7 +557,7 @@ inline bool UnigramDictionary::needsToSkipCurrentNode(const unsigned short c,
} }
inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex, inline bool UnigramDictionary::existsAdjacentProximityChars(const int inputIndex,
const int inputLength) { const int inputLength) const {
if (inputIndex < 0 || inputIndex >= inputLength) return false; if (inputIndex < 0 || inputIndex >= inputLength) return false;
const int currentChar = *getInputCharsAt(inputIndex); const int currentChar = *getInputCharsAt(inputIndex);
const int leftIndex = inputIndex - 1; const int leftIndex = inputIndex - 1;

View file

@ -75,7 +75,8 @@ private:
const int nextLettersSize); const int nextLettersSize);
void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize); void registerNextLetter(unsigned short c, int *nextLetters, int nextLettersSize);
int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos, int calculateFinalFreq(const int inputIndex, const int depth, const int snr, const int skipPos,
const int excessivePos, const int transposedPos, const int freq, const bool sameLength); const int excessivePos, const int transposedPos, const int freq,
const bool sameLength) const;
void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word, void onTerminalWhenUserTypedLengthIsGreaterThanInputLength(unsigned short *word,
const int inputIndex, const int depth, const int snr, int *nextLetters, const int inputIndex, const int depth, const int snr, int *nextLetters,
const int nextLettersSize, const int skipPos, const int excessivePos, const int nextLettersSize, const int skipPos, const int excessivePos,
@ -99,8 +100,8 @@ private:
bool processCurrentNodeForExactMatch(const int firstChildPos, bool processCurrentNodeForExactMatch(const int firstChildPos,
const int startInputIndex, const int depth, unsigned short *word, const int startInputIndex, const int depth, unsigned short *word,
int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos); int *newChildPosition, int *newCount, bool *newTerminal, int *newFreq, int *siblingPos);
bool existsAdjacentProximityChars(const int inputIndex, const int inputLength); bool existsAdjacentProximityChars(const int inputIndex, const int inputLength) const;
inline const int* getInputCharsAt(const int index) { inline const int* getInputCharsAt(const int index) const {
return mInputCodes + (index * MAX_PROXIMITY_CHARS); return mInputCodes + (index * MAX_PROXIMITY_CHARS);
} }
const unsigned char *DICT; const unsigned char *DICT;