From dc5301e5902826b0e9a44fc3d45695526eaf66b3 Mon Sep 17 00:00:00 2001 From: satok Date: Mon, 11 Apr 2011 16:14:45 +0900 Subject: [PATCH] Change the formula of the missing character. - Bug: 4271049 - Due to the result of the recent user study, a word with a missing character needs to be promoted a bit. so I changed the formula from: - freq * 70 * (n - 2) / (n - 1) to: - freq * 90 * (10n - 12) / (10n - 2) Change-Id: Ibff72cbdb0f2d7b91460a06a0fd39a9f5749aa46 --- native/src/defines.h | 3 ++- native/src/unigram_dictionary.cpp | 9 ++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/native/src/defines.h b/native/src/defines.h index 926120703..ff195f40c 100644 --- a/native/src/defines.h +++ b/native/src/defines.h @@ -138,7 +138,8 @@ static void prof_out(void) { #define SUGGEST_WORDS_WITH_SPACE_PROXIMITY true // The following "rate"s are used as a multiplier before dividing by 100, so they are in percent. -#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 70 +#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE 90 +#define WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X 12 #define WORDS_WITH_MISSING_SPACE_CHARACTER_DEMOTION_RATE 80 #define WORDS_WITH_EXCESSIVE_CHARACTER_DEMOTION_RATE 75 #define WORDS_WITH_EXCESSIVE_CHARACTER_OUT_OF_PROXIMITY_DEMOTION_RATE 75 diff --git a/native/src/unigram_dictionary.cpp b/native/src/unigram_dictionary.cpp index c18829014..2ae2bca92 100644 --- a/native/src/unigram_dictionary.cpp +++ b/native/src/unigram_dictionary.cpp @@ -518,9 +518,12 @@ inline int UnigramDictionary::calculateFinalFreq(const int inputIndex, const int // TODO: Demote by edit distance int finalFreq = freq * matchWeight; if (skipPos >= 0) { - if (mInputLength >= 3) { - multiplyRate(WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE * - (mInputLength - 2) / (mInputLength - 1), &finalFreq); + if (mInputLength >= 2) { + const int demotionRate = WORDS_WITH_MISSING_CHARACTER_DEMOTION_RATE + * (10 * mInputLength - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X) + / (10 * mInputLength + - WORDS_WITH_MISSING_CHARACTER_DEMOTION_START_POS_10X + 10); + multiplyRate(demotionRate, &finalFreq); } else { finalFreq = 0; }