am 775d3a8d: Merge "Use BinaryDicitonary.MAX_PREV_WORD_COUNT_FOR_NGRAM for D2 dictioanries."
* commit '775d3a8d53067149bbf78d03e8ae132fea8387c0': Use BinaryDicitonary.MAX_PREV_WORD_COUNT_FOR_NGRAM for D2 dictioanries.main
commit
3136a7c65f
|
@ -58,7 +58,8 @@ public final class BinaryDictionary extends Dictionary {
|
||||||
// Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
|
// Must be equal to CONFIDENCE_TO_AUTO_COMMIT in native/jni/src/defines.h
|
||||||
private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
|
private static final int CONFIDENCE_TO_AUTO_COMMIT = 1000000;
|
||||||
|
|
||||||
static final int DICTIONARY_MAX_WORD_LENGTH = 48;
|
public static final int DICTIONARY_MAX_WORD_LENGTH = 48;
|
||||||
|
public static final int MAX_PREV_WORD_COUNT_FOR_N_GRAM = 3;
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
public static final String UNIGRAM_COUNT_QUERY = "UNIGRAM_COUNT";
|
||||||
|
|
|
@ -125,7 +125,8 @@ public class ContactsBinaryDictionary extends ExpandableBinaryDictionary
|
||||||
*/
|
*/
|
||||||
private void addNameLocked(final String name) {
|
private void addNameLocked(final String name) {
|
||||||
int len = StringUtils.codePointCount(name);
|
int len = StringUtils.codePointCount(name);
|
||||||
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
NgramContext ngramContext = NgramContext.getEmptyPrevWordsContext(
|
||||||
|
BinaryDictionary.MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||||
// TODO: Better tokenization for non-Latin writing systems
|
// TODO: Better tokenization for non-Latin writing systems
|
||||||
for (int i = 0; i < len; i++) {
|
for (int i = 0; i < len; i++) {
|
||||||
if (Character.isLetter(name.codePointAt(i))) {
|
if (Character.isLetter(name.codePointAt(i))) {
|
||||||
|
|
|
@ -43,6 +43,10 @@ public class NgramContext {
|
||||||
|
|
||||||
public static final String CONTEXT_SEPARATOR = " ";
|
public static final String CONTEXT_SEPARATOR = " ";
|
||||||
|
|
||||||
|
public static NgramContext getEmptyPrevWordsContext(int maxPrevWordCount) {
|
||||||
|
return new NgramContext(maxPrevWordCount, WordInfo.EMPTY_WORD_INFO);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Word information used to represent previous words information.
|
* Word information used to represent previous words information.
|
||||||
*/
|
*/
|
||||||
|
@ -102,10 +106,17 @@ public class NgramContext {
|
||||||
private final WordInfo[] mPrevWordsInfo;
|
private final WordInfo[] mPrevWordsInfo;
|
||||||
private final int mPrevWordsCount;
|
private final int mPrevWordsCount;
|
||||||
|
|
||||||
|
private final int mMaxPrevWordCount;
|
||||||
|
|
||||||
// Construct from the previous word information.
|
// Construct from the previous word information.
|
||||||
public NgramContext(final WordInfo... prevWordsInfo) {
|
public NgramContext(final WordInfo... prevWordsInfo) {
|
||||||
|
this(DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, prevWordsInfo);
|
||||||
|
}
|
||||||
|
|
||||||
|
public NgramContext(final int maxPrevWordCount, final WordInfo... prevWordsInfo) {
|
||||||
mPrevWordsInfo = prevWordsInfo;
|
mPrevWordsInfo = prevWordsInfo;
|
||||||
mPrevWordsCount = prevWordsInfo.length;
|
mPrevWordsCount = prevWordsInfo.length;
|
||||||
|
mMaxPrevWordCount = maxPrevWordCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -113,8 +124,7 @@ public class NgramContext {
|
||||||
*/
|
*/
|
||||||
@Nonnull
|
@Nonnull
|
||||||
public NgramContext getNextNgramContext(final WordInfo wordInfo) {
|
public NgramContext getNextNgramContext(final WordInfo wordInfo) {
|
||||||
final int nextPrevWordCount = Math.min(
|
final int nextPrevWordCount = Math.min(mMaxPrevWordCount, mPrevWordsCount + 1);
|
||||||
DecoderSpecificConstants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, mPrevWordsCount + 1);
|
|
||||||
final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
|
final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
|
||||||
prevWordsInfo[0] = wordInfo;
|
prevWordsInfo[0] = wordInfo;
|
||||||
System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
|
System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
|
||||||
|
|
|
@ -20,6 +20,7 @@ import android.content.Context;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.ExternallyReferenced;
|
import com.android.inputmethod.annotations.ExternallyReferenced;
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
import com.android.inputmethod.latin.Dictionary;
|
import com.android.inputmethod.latin.Dictionary;
|
||||||
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
import com.android.inputmethod.latin.ExpandableBinaryDictionary;
|
||||||
import com.android.inputmethod.latin.NgramContext;
|
import com.android.inputmethod.latin.NgramContext;
|
||||||
|
@ -98,7 +99,7 @@ public class UserHistoryDictionary extends ExpandableBinaryDictionary {
|
||||||
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
|
public static void addToDictionary(final ExpandableBinaryDictionary userHistoryDictionary,
|
||||||
@Nonnull final NgramContext ngramContext, final String word, final boolean isValid,
|
@Nonnull final NgramContext ngramContext, final String word, final boolean isValid,
|
||||||
final int timestamp) {
|
final int timestamp) {
|
||||||
if (word.length() > DecoderSpecificConstants.DICTIONARY_MAX_WORD_LENGTH) {
|
if (word.length() > BinaryDictionary.DICTIONARY_MAX_WORD_LENGTH) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
userHistoryDictionary.updateEntriesForWord(ngramContext, word,
|
userHistoryDictionary.updateEntriesForWord(ngramContext, word,
|
||||||
|
|
|
@ -18,6 +18,7 @@ package com.android.inputmethod.latin.personalization;
|
||||||
|
|
||||||
import android.content.Context;
|
import android.content.Context;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
import com.android.inputmethod.latin.NgramContext;
|
import com.android.inputmethod.latin.NgramContext;
|
||||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||||
import com.android.inputmethod.latin.common.FileUtils;
|
import com.android.inputmethod.latin.common.FileUtils;
|
||||||
|
@ -98,7 +99,8 @@ public class UserHistoryDictionaryTestsHelper {
|
||||||
|
|
||||||
private static void addWordsToDictionary(final UserHistoryDictionary dict,
|
private static void addWordsToDictionary(final UserHistoryDictionary dict,
|
||||||
final List<String> words, final int timestamp) {
|
final List<String> words, final int timestamp) {
|
||||||
NgramContext ngramContext = NgramContext.EMPTY_PREV_WORDS_INFO;
|
NgramContext ngramContext = NgramContext.getEmptyPrevWordsContext(
|
||||||
|
BinaryDictionary.MAX_PREV_WORD_COUNT_FOR_N_GRAM);
|
||||||
for (final String word : words) {
|
for (final String word : words) {
|
||||||
UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, timestamp);
|
UserHistoryDictionary.addToDictionary(dict, ngramContext, word, true, timestamp);
|
||||||
ngramContext = ngramContext.getNextNgramContext(new WordInfo(word));
|
ngramContext = ngramContext.getNextNgramContext(new WordInfo(word));
|
||||||
|
|
Loading…
Reference in New Issue