Prepare supporting n-gram for user history dictionary.

Bug:17097992
Change-Id: Ic8bfde3d4cc0e720bf7681e08e16fb2ad94d5670
This commit is contained in:
Keisuke Kuroyanagi 2014-09-22 18:18:50 +09:00
parent b51412a05c
commit ddfaeff544
3 changed files with 33 additions and 12 deletions

View file

@ -126,6 +126,13 @@ public class PrevWordsInfo {
} }
} }
public PrevWordsInfo getTrimmedPrevWordsInfo(final int maxPrevWordCount) {
final int newSize = Math.min(maxPrevWordCount, mPrevWordsInfo.length);
// TODO: Quit creating a new array.
final WordInfo[] prevWordsInfo = Arrays.copyOf(mPrevWordsInfo, newSize);
return new PrevWordsInfo(prevWordsInfo);
}
public int getPrevWordCount() { public int getPrevWordCount() {
return mPrevWordsInfo.length; return mPrevWordsInfo.length;
} }

View file

@ -35,6 +35,7 @@ import java.util.Locale;
*/ */
public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase { public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
/* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName(); /* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName();
private final static int SUPPORTED_NGRAM = 2; // TODO: 3
// TODO: Make this constructor private // TODO: Make this constructor private
/* package */ UserHistoryDictionary(final Context context, final Locale locale) { /* package */ UserHistoryDictionary(final Context context, final Locale locale) {
@ -61,9 +62,7 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary, public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid, final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid,
final int timestamp, final DistracterFilter distracterFilter) { final int timestamp, final DistracterFilter distracterFilter) {
final CharSequence prevWord = prevWordsInfo.mPrevWordsInfo[0].mWord; if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH) {
if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH ||
(prevWord != null && prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
return; return;
} }
final int frequency = isValid ? final int frequency = isValid ?
@ -71,17 +70,29 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency, userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency,
null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */, null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, timestamp, distracterFilter); false /* isBlacklisted */, timestamp, distracterFilter);
// Do not insert a word as a bigram of itself
if (TextUtils.equals(word, prevWord)) { final boolean isBeginningOfSentenceContext =
return; prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence;
} final PrevWordsInfo prevWordsInfoToBeSaved =
if (null != prevWord) { prevWordsInfo.getTrimmedPrevWordsInfo(SUPPORTED_NGRAM - 1);
if (prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence) { for (int i = 0; i < prevWordsInfoToBeSaved.getPrevWordCount(); i++) {
// Beginning-of-Sentence n-gram entry is treated as a n-gram entry of invalid word. final CharSequence prevWord = prevWordsInfoToBeSaved.mPrevWordsInfo[i].mWord;
userHistoryDictionary.addNgramEntry(prevWordsInfo, word, if (prevWord == null || (prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
return;
}
// Do not insert a word as a bigram of itself
if (i == 0 && TextUtils.equals(word, prevWord)) {
return;
}
if (isBeginningOfSentenceContext) {
// Beginning-of-Sentence n-gram entry is added as an n-gram entry of an OOV word.
userHistoryDictionary.addNgramEntry(
prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word,
FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp); FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp);
} else { } else {
userHistoryDictionary.addNgramEntry(prevWordsInfo, word, frequency, timestamp); userHistoryDictionary.addNgramEntry(
prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, frequency,
timestamp);
} }
} }
} }

View file

@ -166,6 +166,9 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI
for (const auto entry : languageModelDictContent->getProbabilityEntries( for (const auto entry : languageModelDictContent->getProbabilityEntries(
prevWordIds.limit(i))) { prevWordIds.limit(i))) {
const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry(); const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
if (!probabilityEntry.isValid()) {
continue;
}
const int probability = probabilityEntry.hasHistoricalInfo() ? const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability( ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy) probabilityEntry.getHistoricalInfo(), mHeaderPolicy)