am 9997853d: Merge "Prepare supporting n-gram for user history dictionary."

* commit '9997853d6fa5b17402c0a8a490fa3bafc0e58933':
  Prepare supporting n-gram for user history dictionary.
This commit is contained in:
Keisuke Kuroyanagi 2014-09-23 10:27:36 +00:00 committed by Android Git Automerger
commit 61f90fd819
3 changed files with 33 additions and 12 deletions

View file

@ -126,6 +126,13 @@ public class PrevWordsInfo {
} }
} }
public PrevWordsInfo getTrimmedPrevWordsInfo(final int maxPrevWordCount) {
final int newSize = Math.min(maxPrevWordCount, mPrevWordsInfo.length);
// TODO: Quit creating a new array.
final WordInfo[] prevWordsInfo = Arrays.copyOf(mPrevWordsInfo, newSize);
return new PrevWordsInfo(prevWordsInfo);
}
public int getPrevWordCount() { public int getPrevWordCount() {
return mPrevWordsInfo.length; return mPrevWordsInfo.length;
} }

View file

@ -35,6 +35,7 @@ import java.util.Locale;
*/ */
public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase { public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
/* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName(); /* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName();
private final static int SUPPORTED_NGRAM = 2; // TODO: 3
// TODO: Make this constructor private // TODO: Make this constructor private
/* package */ UserHistoryDictionary(final Context context, final Locale locale) { /* package */ UserHistoryDictionary(final Context context, final Locale locale) {
@ -61,9 +62,7 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary, public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid, final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid,
final int timestamp, final DistracterFilter distracterFilter) { final int timestamp, final DistracterFilter distracterFilter) {
final CharSequence prevWord = prevWordsInfo.mPrevWordsInfo[0].mWord; if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH) {
if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH ||
(prevWord != null && prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
return; return;
} }
final int frequency = isValid ? final int frequency = isValid ?
@ -71,17 +70,29 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency, userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency,
null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */, null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */,
false /* isBlacklisted */, timestamp, distracterFilter); false /* isBlacklisted */, timestamp, distracterFilter);
// Do not insert a word as a bigram of itself
if (TextUtils.equals(word, prevWord)) { final boolean isBeginningOfSentenceContext =
prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence;
final PrevWordsInfo prevWordsInfoToBeSaved =
prevWordsInfo.getTrimmedPrevWordsInfo(SUPPORTED_NGRAM - 1);
for (int i = 0; i < prevWordsInfoToBeSaved.getPrevWordCount(); i++) {
final CharSequence prevWord = prevWordsInfoToBeSaved.mPrevWordsInfo[i].mWord;
if (prevWord == null || (prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
return; return;
} }
if (null != prevWord) { // Do not insert a word as a bigram of itself
if (prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence) { if (i == 0 && TextUtils.equals(word, prevWord)) {
// Beginning-of-Sentence n-gram entry is treated as a n-gram entry of invalid word. return;
userHistoryDictionary.addNgramEntry(prevWordsInfo, word, }
if (isBeginningOfSentenceContext) {
// Beginning-of-Sentence n-gram entry is added as an n-gram entry of an OOV word.
userHistoryDictionary.addNgramEntry(
prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word,
FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp); FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp);
} else { } else {
userHistoryDictionary.addNgramEntry(prevWordsInfo, word, frequency, timestamp); userHistoryDictionary.addNgramEntry(
prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, frequency,
timestamp);
} }
} }
} }

View file

@ -166,6 +166,9 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI
for (const auto entry : languageModelDictContent->getProbabilityEntries( for (const auto entry : languageModelDictContent->getProbabilityEntries(
prevWordIds.limit(i))) { prevWordIds.limit(i))) {
const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry(); const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
if (!probabilityEntry.isValid()) {
continue;
}
const int probability = probabilityEntry.hasHistoricalInfo() ? const int probability = probabilityEntry.hasHistoricalInfo() ?
ForgettingCurveUtils::decodeProbability( ForgettingCurveUtils::decodeProbability(
probabilityEntry.getHistoricalInfo(), mHeaderPolicy) probabilityEntry.getHistoricalInfo(), mHeaderPolicy)