Merge "Use BinaryDicitonary.MAX_PREV_WORD_COUNT_FOR_NGRAM for D2 dictioanries."

This commit is contained in:
Mohammadinamul Sheik 2015-03-17 19:10:14 +00:00 committed by Android (Google) Code Review
commit 775d3a8d53
5 changed files with 21 additions and 6 deletions

View file

@ -58,7 +58,8 @@ public final class BinaryDictionary extends Dictionary {
// Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h // Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000; private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
static final int DICTIONARY_MAX_WORD_LENGTH = 48; public static final int DICTIONARY_MAX_WORD_LENGTH = 48;
public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
@UsedForTesting @UsedForTesting
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT"; public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";

View file

@ -125,7 +125,8 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary
*/ */
private void addNameLocked(final String name) { private void addNameLocked(final String name) {
int len = StringUtils.codePointCount(name); int len = StringUtils.codePointCount(name);
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; NgramContext ngramContext = NgramContext.getEmptyPrevWordsContext(
BinaryDictionary.MAX_PREV_WORD_COUNT_FOR_N_GRAM);
// TODO: Better tokenization for non-Latin writing systems // TODO: Better tokenization for non-Latin writing systems
for (int i = 0; i < len; i++) { for (int i = 0; i < len; i++) {
if (Character.isLetter(name.codePointAt(i))) { if (Character.isLetter(name.codePointAt(i))) {

View file

@ -43,6 +43,10 @@ public class NgramContext {
public static final String CONTEXT_SEPARATOR = " "; public static final String CONTEXT_SEPARATOR = " ";
public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
}
/** /**
* Word information used to represent previous words information. * Word information used to represent previous words information.
*/ */
@ -102,10 +106,17 @@ public class NgramContext {
private final WordInfo[] mPrevWordsInfo; private final WordInfo[] mPrevWordsInfo;
private final int mPrevWordsCount; private final int mPrevWordsCount;
private final int mMaxPrevWordCount;
// Construct from the previous word information. // Construct from the previous word information.
public NgramContext(final WordInfo... prevWordsInfo) { public NgramContext(final WordInfo... prevWordsInfo) {
this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
}
public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
mPrevWordsInfo = prevWordsInfo; mPrevWordsInfo = prevWordsInfo;
mPrevWordsCount = prevWordsInfo.length; mPrevWordsCount = prevWordsInfo.length;
mMaxPrevWordCount = maxPrevWordCount;
} }
/** /**
@ -113,8 +124,7 @@ public class NgramContext {
*/ */
@Nonnull @Nonnull
public NgramContext getNextNgramContext(final WordInfo wordInfo) { public NgramContext getNextNgramContext(final WordInfo wordInfo) {
final int nextPrevWordCount = Math.min( final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, mPrevWordsCount + 1);
final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
prevWordsInfo[0] = wordInfo; prevWordsInfo[0] = wordInfo;
System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);

View file

@ -20,6 +20,7 @@ import android.content.Context;
import com.android.inputmethod.annotations.ExternallyReferenced; import com.android.inputmethod.annotations.ExternallyReferenced;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.Dictionary; import com.android.inputmethod.latin.Dictionary;
import com.android.inputmethod.latin.ExpandableBinaryDictionary; import com.android.inputmethod.latin.ExpandableBinaryDictionary;
import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.NgramContext;
@ -98,7 +99,7 @@ public class UserHistoryDictionary extends ExpandableBinaryDictionary {
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary, public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
@Nonnull final NgramContext ngramContext, final String word, final boolean isValid, @Nonnull final NgramContext ngramContext, final String word, final boolean isValid,
final int timestamp) { final int timestamp) {
if (word.length() > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) { if (word.length() > BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH) {
return; return;
} }
userHistoryDictionary.updateEntriesForWord(ngramContext, word, userHistoryDictionary.updateEntriesForWord(ngramContext, word,

View file

@ -18,6 +18,7 @@ package com.android.inputmethod.latin.personalization;
import android.content.Context; import android.content.Context;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.NgramContext; import com.android.inputmethod.latin.NgramContext;
import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.common.FileUtils; import com.android.inputmethod.latin.common.FileUtils;
@ -98,7 +99,8 @@ public class UserHistoryDictionaryTestsHelper {
private static void addWordsToDictionary(final UserHistoryDictionary dict, private static void addWordsToDictionary(final UserHistoryDictionary dict,
final List<String> words, final int timestamp) { final List<String> words, final int timestamp) {
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO; NgramContext ngramContext = NgramContext.getEmptyPrevWordsContext(
BinaryDictionary.MAX_PREV_WORD_COUNT_FOR_N_GRAM);
for (final String word : words) { for (final String word : words) {
UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, timestamp); UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, timestamp);
ngramContext = ngramContext.getNextNgramContext(new WordInfo(word)); ngramContext = ngramContext.getNextNgramContext(new WordInfo(word));