Merge "Apply distracter filter for in-vocabulary words."
This commit is contained in:
commit
f85b478cd8
1 changed files with 13 additions and 16 deletions
|
@ -130,37 +130,27 @@ public final class LanguageModelParam {
|
||||||
if (locale == null) {
|
if (locale == null) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
// TODO: Though targetWord is an IV (in-vocabulary) word, we should still apply
|
|
||||||
// distracterFilter in the following code. If targetWord is a distracter,
|
|
||||||
// it should be filtered out.
|
|
||||||
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
|
if (dictionaryFacilitator.isValidWord(targetWord, false /* ignoreCase */)) {
|
||||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
|
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
|
||||||
true /* isValidWord */, locale);
|
true /* isValidWord */, locale, distracterFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
|
final String lowerCaseTargetWord = targetWord.toLowerCase(locale);
|
||||||
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
|
if (dictionaryFacilitator.isValidWord(lowerCaseTargetWord, false /* ignoreCase */)) {
|
||||||
// Add the lower-cased word.
|
// Add the lower-cased word.
|
||||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord,
|
return createAndGetLanguageModelParamOfWord(prevWordsInfo, lowerCaseTargetWord,
|
||||||
timestamp, true /* isValidWord */, locale);
|
timestamp, true /* isValidWord */, locale, distracterFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Treat the word as an OOV word. The following statement checks whether this OOV
|
// Treat the word as an OOV word.
|
||||||
// is a distracter to words in dictionaries. Being a distracter means the OOV word is
|
|
||||||
// too close to a common word in dictionaries (e.g., the OOV "mot" is very close to "not").
|
|
||||||
// Adding such a word to dictonaries would interfere with entering in-dictionary words. For
|
|
||||||
// example, adding "mot" to dictionaries might interfere with entering "not".
|
|
||||||
// This kind of OOV should be filtered out.
|
|
||||||
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, targetWord, locale)) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
|
return createAndGetLanguageModelParamOfWord(prevWordsInfo, targetWord, timestamp,
|
||||||
false /* isValidWord */, locale);
|
false /* isValidWord */, locale, distracterFilter);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static LanguageModelParam createAndGetLanguageModelParamOfWord(
|
private static LanguageModelParam createAndGetLanguageModelParamOfWord(
|
||||||
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
|
final PrevWordsInfo prevWordsInfo, final String targetWord, final int timestamp,
|
||||||
final boolean isValidWord, final Locale locale) {
|
final boolean isValidWord, final Locale locale,
|
||||||
|
final DistracterFilter distracterFilter) {
|
||||||
final String word;
|
final String word;
|
||||||
if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
|
if (StringUtils.getCapitalizationType(targetWord) == StringUtils.CAPITALIZE_FIRST
|
||||||
&& prevWordsInfo.mPrevWord == null && !isValidWord) {
|
&& prevWordsInfo.mPrevWord == null && !isValidWord) {
|
||||||
|
@ -168,6 +158,13 @@ public final class LanguageModelParam {
|
||||||
} else {
|
} else {
|
||||||
word = targetWord;
|
word = targetWord;
|
||||||
}
|
}
|
||||||
|
// Check whether the word is a distracter to words in the dictionaries.
|
||||||
|
if (distracterFilter.isDistracterToWordsInDictionaries(prevWordsInfo, word, locale)) {
|
||||||
|
if (DEBUG) {
|
||||||
|
Log.d(TAG, "The word (" + word + ") is a distracter. Skip this word.");
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
final int unigramProbability = isValidWord ?
|
final int unigramProbability = isValidWord ?
|
||||||
UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
|
UNIGRAM_PROBABILITY_FOR_VALID_WORD : UNIGRAM_PROBABILITY_FOR_OOV_WORD;
|
||||||
if (prevWordsInfo.mPrevWord == null) {
|
if (prevWordsInfo.mPrevWord == null) {
|
||||||
|
|
Loading…
Reference in a new issue