Merge "Use BinaryDicitonary.MAX_PREV_WORD_COUNT_FOR_NGRAM for D2 dictioanries."
This commit is contained in:
commit
775d3a8d53
5 changed files with 21 additions and 6 deletions
|
@ -58,7 +58,8 @@ public final class BinaryDictionary extends Dictionary {
|
|||
// Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
|
||||
private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
|
||||
|
||||
static final int DICTIONARY_MAX_WORD_LENGTH = 48;
|
||||
public static final int DICTIONARY_MAX_WORD_LENGTH = 48;
|
||||
public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
|
||||
|
||||
@UsedForTesting
|
||||
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||
|
|
|
@ -125,7 +125,8 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary
|
|||
*/
|
||||
private void addNameLocked(final String name) {
|
||||
int len = StringUtils.codePointCount(name);
|
||||
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
||||
NgramContext ngramContext = NgramContext.getEmptyPrevWordsContext(
|
||||
BinaryDictionary.MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||
// TODO: Better tokenization for non-Latin writing systems
|
||||
for (int i = 0; i < len; i++) {
|
||||
if (Character.isLetter(name.codePointAt(i))) {
|
||||
|
|
|
@ -43,6 +43,10 @@ public class NgramContext {
|
|||
|
||||
public static final String CONTEXT_SEPARATOR = " ";
|
||||
|
||||
public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
|
||||
return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
|
||||
}
|
||||
|
||||
/**
|
||||
* Word information used to represent previous words information.
|
||||
*/
|
||||
|
@ -102,10 +106,17 @@ public class NgramContext {
|
|||
private final WordInfo[] mPrevWordsInfo;
|
||||
private final int mPrevWordsCount;
|
||||
|
||||
private final int mMaxPrevWordCount;
|
||||
|
||||
// Construct from the previous word information.
|
||||
public NgramContext(final WordInfo... prevWordsInfo) {
|
||||
this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
|
||||
}
|
||||
|
||||
public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
|
||||
mPrevWordsInfo = prevWordsInfo;
|
||||
mPrevWordsCount = prevWordsInfo.length;
|
||||
mMaxPrevWordCount = maxPrevWordCount;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -113,8 +124,7 @@ public class NgramContext {
|
|||
*/
|
||||
@Nonnull
|
||||
public NgramContext getNextNgramContext(final WordInfo wordInfo) {
|
||||
final int nextPrevWordCount = Math.min(
|
||||
DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, mPrevWordsCount + 1);
|
||||
final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
|
||||
final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
|
||||
prevWordsInfo[0] = wordInfo;
|
||||
System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
|
||||
|
|
|
@ -20,6 +20,7 @@ import android.content.Context;
|
|||
|
||||
import com.android.inputmethod.annotations.ExternallyReferenced;
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.BinaryDictionary;
|
||||
import com.android.inputmethod.latin.Dictionary;
|
||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||
import com.android.inputmethod.latin.NgramContext;
|
||||
|
@ -98,7 +99,7 @@ public class UserHistoryDictionary extends ExpandableBinaryDictionary {
|
|||
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
|
||||
@Nonnull final NgramContext ngramContext, final String word, final boolean isValid,
|
||||
final int timestamp) {
|
||||
if (word.length() > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
if (word.length() > BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH) {
|
||||
return;
|
||||
}
|
||||
userHistoryDictionary.updateEntriesForWord(ngramContext, word,
|
||||
|
|
|
@ -18,6 +18,7 @@ package com.android.inputmethod.latin.personalization;
|
|||
|
||||
import android.content.Context;
|
||||
|
||||
import com.android.inputmethod.latin.BinaryDictionary;
|
||||
import com.android.inputmethod.latin.NgramContext;
|
||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||
import com.android.inputmethod.latin.common.FileUtils;
|
||||
|
@ -98,7 +99,8 @@ public class UserHistoryDictionaryTestsHelper {
|
|||
|
||||
private static void addWordsToDictionary(final UserHistoryDictionary dict,
|
||||
final List<String> words, final int timestamp) {
|
||||
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
||||
NgramContext ngramContext = NgramContext.getEmptyPrevWordsContext(
|
||||
BinaryDictionary.MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||
for (final String word : words) {
|
||||
UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, timestamp);
|
||||
ngramContext = ngramContext.getNextNgramContext(new WordInfo(word));
|
||||
|
|
Loading…
Reference in a new issue