am 9997853d
: Merge "Prepare supporting n-gram for user history dictionary."
* commit '9997853d6fa5b17402c0a8a490fa3bafc0e58933': Prepare supporting n-gram for user history dictionary.
This commit is contained in:
commit
61f90fd819
3 changed files with 33 additions and 12 deletions
|
@ -126,6 +126,13 @@ public class PrevWordsInfo {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public PrevWordsInfo getTrimmedPrevWordsInfo(final int maxPrevWordCount) {
|
||||||
|
final int newSize = Math.min(maxPrevWordCount, mPrevWordsInfo.length);
|
||||||
|
// TODO: Quit creating a new array.
|
||||||
|
final WordInfo[] prevWordsInfo = Arrays.copyOf(mPrevWordsInfo, newSize);
|
||||||
|
return new PrevWordsInfo(prevWordsInfo);
|
||||||
|
}
|
||||||
|
|
||||||
public int getPrevWordCount() {
|
public int getPrevWordCount() {
|
||||||
return mPrevWordsInfo.length;
|
return mPrevWordsInfo.length;
|
||||||
}
|
}
|
||||||
|
|
|
@ -35,6 +35,7 @@ import java.util.Locale;
|
||||||
*/
|
*/
|
||||||
public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
|
public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBase {
|
||||||
/* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName();
|
/* package */ static final String NAME = UserHistoryDictionary.class.getSimpleName();
|
||||||
|
private final static int SUPPORTED_NGRAM = 2; // TODO: 3
|
||||||
|
|
||||||
// TODO: Make this constructor private
|
// TODO: Make this constructor private
|
||||||
/* package */ UserHistoryDictionary(final Context context, final Locale locale) {
|
/* package */ UserHistoryDictionary(final Context context, final Locale locale) {
|
||||||
|
@ -61,9 +62,7 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
|
||||||
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
|
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
|
||||||
final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid,
|
final PrevWordsInfo prevWordsInfo, final String word, final boolean isValid,
|
||||||
final int timestamp, final DistracterFilter distracterFilter) {
|
final int timestamp, final DistracterFilter distracterFilter) {
|
||||||
final CharSequence prevWord = prevWordsInfo.mPrevWordsInfo[0].mWord;
|
if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||||
if (word.length() > Constants.DICTIONARY_MAX_WORD_LENGTH ||
|
|
||||||
(prevWord != null && prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
|
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
final int frequency = isValid ?
|
final int frequency = isValid ?
|
||||||
|
@ -71,17 +70,29 @@ public class UserHistoryDictionary extends DecayingExpandableBinaryDictionaryBas
|
||||||
userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency,
|
userHistoryDictionary.addUnigramEntryWithCheckingDistracter(word, frequency,
|
||||||
null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */,
|
null /* shortcutTarget */, 0 /* shortcutFreq */, false /* isNotAWord */,
|
||||||
false /* isBlacklisted */, timestamp, distracterFilter);
|
false /* isBlacklisted */, timestamp, distracterFilter);
|
||||||
// Do not insert a word as a bigram of itself
|
|
||||||
if (TextUtils.equals(word, prevWord)) {
|
final boolean isBeginningOfSentenceContext =
|
||||||
return;
|
prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence;
|
||||||
}
|
final PrevWordsInfo prevWordsInfoToBeSaved =
|
||||||
if (null != prevWord) {
|
prevWordsInfo.getTrimmedPrevWordsInfo(SUPPORTED_NGRAM - 1);
|
||||||
if (prevWordsInfo.mPrevWordsInfo[0].mIsBeginningOfSentence) {
|
for (int i = 0; i < prevWordsInfoToBeSaved.getPrevWordCount(); i++) {
|
||||||
// Beginning-of-Sentence n-gram entry is treated as a n-gram entry of invalid word.
|
final CharSequence prevWord = prevWordsInfoToBeSaved.mPrevWordsInfo[i].mWord;
|
||||||
userHistoryDictionary.addNgramEntry(prevWordsInfo, word,
|
if (prevWord == null || (prevWord.length() > Constants.DICTIONARY_MAX_WORD_LENGTH)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
// Do not insert a word as a bigram of itself
|
||||||
|
if (i == 0 && TextUtils.equals(word, prevWord)) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (isBeginningOfSentenceContext) {
|
||||||
|
// Beginning-of-Sentence n-gram entry is added as an n-gram entry of an OOV word.
|
||||||
|
userHistoryDictionary.addNgramEntry(
|
||||||
|
prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word,
|
||||||
FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp);
|
FREQUENCY_FOR_WORDS_NOT_IN_DICTS, timestamp);
|
||||||
} else {
|
} else {
|
||||||
userHistoryDictionary.addNgramEntry(prevWordsInfo, word, frequency, timestamp);
|
userHistoryDictionary.addNgramEntry(
|
||||||
|
prevWordsInfoToBeSaved.getTrimmedPrevWordsInfo(i + 1), word, frequency,
|
||||||
|
timestamp);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -166,6 +166,9 @@ void Ver4PatriciaTriePolicy::iterateNgramEntries(const WordIdArrayView prevWordI
|
||||||
for (const auto entry : languageModelDictContent->getProbabilityEntries(
|
for (const auto entry : languageModelDictContent->getProbabilityEntries(
|
||||||
prevWordIds.limit(i))) {
|
prevWordIds.limit(i))) {
|
||||||
const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
|
const ProbabilityEntry &probabilityEntry = entry.getProbabilityEntry();
|
||||||
|
if (!probabilityEntry.isValid()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
const int probability = probabilityEntry.hasHistoricalInfo() ?
|
||||||
ForgettingCurveUtils::decodeProbability(
|
ForgettingCurveUtils::decodeProbability(
|
||||||
probabilityEntry.getHistoricalInfo(), mHeaderPolicy)
|
probabilityEntry.getHistoricalInfo(), mHeaderPolicy)
|
||||||
|
|
Loading…
Reference in a new issue