2014-05-19 04:55:40 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2014 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package com.android.inputmethod.latin;
|
|
|
|
|
2014-07-19 15:27:27 +00:00
|
|
|
import android.text.TextUtils;
|
2014-06-25 05:14:37 +00:00
|
|
|
|
2014-09-26 12:46:57 +00:00
|
|
|
import com.android.inputmethod.annotations.UsedForTesting;
|
2014-10-23 09:37:32 +00:00
|
|
|
import com.android.inputmethod.latin.common.Constants;
|
2014-10-28 12:31:09 +00:00
|
|
|
import com.android.inputmethod.latin.common.StringUtils;
|
2014-06-25 05:14:37 +00:00
|
|
|
|
2015-02-04 20:50:31 +00:00
|
|
|
import java.util.ArrayList;
|
2014-07-19 15:27:27 +00:00
|
|
|
import java.util.Arrays;
|
|
|
|
|
2014-10-22 05:04:07 +00:00
|
|
|
import javax.annotation.Nonnull;
|
|
|
|
|
2014-05-23 14:19:33 +00:00
|
|
|
/**
|
|
|
|
* Class to represent information of previous words. This class is used to add n-gram entries
|
|
|
|
* into binary dictionaries, to get predictions, and to get suggestions.
|
|
|
|
*/
|
2014-09-29 01:52:18 +00:00
|
|
|
public class NgramContext {
|
2014-10-22 05:04:07 +00:00
|
|
|
@Nonnull
|
2014-09-29 01:52:18 +00:00
|
|
|
public static final NgramContext EMPTY_PREV_WORDS_INFO =
|
|
|
|
new NgramContext(WordInfo.EMPTY_WORD_INFO);
|
2014-10-22 05:04:07 +00:00
|
|
|
@Nonnull
|
2014-09-29 01:52:18 +00:00
|
|
|
public static final NgramContext BEGINNING_OF_SENTENCE =
|
2014-10-20 05:48:56 +00:00
|
|
|
new NgramContext(WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO);
|
2014-05-23 10:58:58 +00:00
|
|
|
|
2015-02-04 20:50:31 +00:00
|
|
|
public static final String BEGINNING_OF_SENTENCE_TAG = "<S>";
|
|
|
|
|
|
|
|
public static final String CONTEXT_SEPARATOR = " ";
|
|
|
|
|
2014-06-25 05:14:37 +00:00
|
|
|
/**
|
|
|
|
* Word information used to represent previous words information.
|
|
|
|
*/
|
|
|
|
public static class WordInfo {
|
2014-10-22 05:04:07 +00:00
|
|
|
@Nonnull
|
2014-06-25 05:14:37 +00:00
|
|
|
public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null);
|
2014-10-22 05:04:07 +00:00
|
|
|
@Nonnull
|
2014-10-20 05:48:56 +00:00
|
|
|
public static final WordInfo BEGINNING_OF_SENTENCE_WORD_INFO = new WordInfo();
|
2014-06-25 05:14:37 +00:00
|
|
|
|
2014-07-19 15:27:27 +00:00
|
|
|
// This is an empty char sequence when mIsBeginningOfSentence is true.
|
|
|
|
public final CharSequence mWord;
|
2014-06-25 05:14:37 +00:00
|
|
|
// TODO: Have sentence separator.
|
|
|
|
// Whether the current context is beginning of sentence or not. This is true when composing
|
|
|
|
// at the beginning of an input field or composing a word after a sentence separator.
|
|
|
|
public final boolean mIsBeginningOfSentence;
|
|
|
|
|
|
|
|
// Beginning of sentence.
|
2014-10-20 05:48:56 +00:00
|
|
|
private WordInfo() {
|
2014-06-25 05:14:37 +00:00
|
|
|
mWord = "";
|
|
|
|
mIsBeginningOfSentence = true;
|
|
|
|
}
|
|
|
|
|
2014-07-19 15:27:27 +00:00
|
|
|
public WordInfo(final CharSequence word) {
|
2014-06-25 05:14:37 +00:00
|
|
|
mWord = word;
|
|
|
|
mIsBeginningOfSentence = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean isValid() {
|
|
|
|
return mWord != null;
|
|
|
|
}
|
2014-06-27 08:59:21 +00:00
|
|
|
|
|
|
|
@Override
|
|
|
|
public int hashCode() {
|
|
|
|
return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } );
|
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean equals(Object o) {
|
|
|
|
if (this == o) return true;
|
|
|
|
if (!(o instanceof WordInfo)) return false;
|
|
|
|
final WordInfo wordInfo = (WordInfo)o;
|
|
|
|
if (mWord == null || wordInfo.mWord == null) {
|
|
|
|
return mWord == wordInfo.mWord
|
|
|
|
&& mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
|
|
|
|
}
|
2014-07-19 15:27:27 +00:00
|
|
|
return TextUtils.equals(mWord, wordInfo.mWord)
|
2014-06-27 08:59:21 +00:00
|
|
|
&& mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence;
|
|
|
|
}
|
2014-06-25 05:14:37 +00:00
|
|
|
}
|
2014-05-19 04:55:40 +00:00
|
|
|
|
2014-06-25 05:14:37 +00:00
|
|
|
// The words immediately before the considered word. EMPTY_WORD_INFO element means we don't
|
|
|
|
// have any context for that previous word including the "beginning of sentence context" - we
|
|
|
|
// just don't know what to predict using the information. An example of that is after a comma.
|
|
|
|
// For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the
|
|
|
|
// WordComposer was reset and before starting a new composing word, but we should never be
|
|
|
|
// calling getSuggetions* in this situation.
|
2014-09-26 12:46:57 +00:00
|
|
|
private final WordInfo[] mPrevWordsInfo;
|
|
|
|
private final int mPrevWordsCount;
|
2014-05-21 06:40:08 +00:00
|
|
|
|
2014-06-25 05:14:37 +00:00
|
|
|
// Construct from the previous word information.
|
2014-09-29 01:52:18 +00:00
|
|
|
public NgramContext(final WordInfo... prevWordsInfo) {
|
2014-09-26 12:46:57 +00:00
|
|
|
mPrevWordsInfo = prevWordsInfo;
|
|
|
|
mPrevWordsCount = prevWordsInfo.length;
|
2014-06-25 05:14:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// Create next prevWordsInfo using current prevWordsInfo.
|
2014-10-22 05:04:07 +00:00
|
|
|
@Nonnull
|
2014-09-29 01:52:18 +00:00
|
|
|
public NgramContext getNextNgramContext(final WordInfo wordInfo) {
|
2014-09-17 13:04:25 +00:00
|
|
|
final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM,
|
2014-09-26 12:46:57 +00:00
|
|
|
mPrevWordsCount + 1);
|
2014-09-17 13:04:25 +00:00
|
|
|
final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount];
|
2014-06-25 05:14:37 +00:00
|
|
|
prevWordsInfo[0] = wordInfo;
|
2014-09-26 12:46:57 +00:00
|
|
|
System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1);
|
2014-09-29 01:52:18 +00:00
|
|
|
return new NgramContext(prevWordsInfo);
|
2014-05-19 04:55:40 +00:00
|
|
|
}
|
2014-05-23 10:58:58 +00:00
|
|
|
|
2015-02-04 20:50:31 +00:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Extracts the previous words context.
|
|
|
|
*
|
|
|
|
* @return a String with the previous words separated by white space.
|
|
|
|
*/
|
|
|
|
public String extractPrevWordsContext() {
|
|
|
|
final ArrayList<String> terms = new ArrayList<>();
|
|
|
|
for (int i = mPrevWordsInfo.length - 1; i >= 0; --i) {
|
|
|
|
if (mPrevWordsInfo[i] != null && mPrevWordsInfo[i].isValid()) {
|
|
|
|
final NgramContext.WordInfo wordInfo = mPrevWordsInfo[i];
|
|
|
|
if (wordInfo.mIsBeginningOfSentence) {
|
|
|
|
terms.add(BEGINNING_OF_SENTENCE_TAG);
|
|
|
|
} else {
|
|
|
|
final String term = wordInfo.mWord.toString();
|
|
|
|
if (!term.isEmpty()) {
|
|
|
|
terms.add(term);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return terms.size() == 0 ? BEGINNING_OF_SENTENCE_TAG
|
|
|
|
: TextUtils.join(CONTEXT_SEPARATOR, terms);
|
|
|
|
}
|
|
|
|
|
2014-05-23 10:58:58 +00:00
|
|
|
public boolean isValid() {
|
2014-09-26 12:46:57 +00:00
|
|
|
return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid();
|
|
|
|
}
|
|
|
|
|
|
|
|
public boolean isBeginningOfSentenceContext() {
|
|
|
|
return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence;
|
|
|
|
}
|
|
|
|
|
|
|
|
// n is 1-indexed.
|
|
|
|
// TODO: Remove
|
|
|
|
public CharSequence getNthPrevWord(final int n) {
|
|
|
|
if (n <= 0 || n > mPrevWordsCount) {
|
|
|
|
return null;
|
|
|
|
}
|
|
|
|
return mPrevWordsInfo[n - 1].mWord;
|
|
|
|
}
|
|
|
|
|
|
|
|
// n is 1-indexed.
|
|
|
|
@UsedForTesting
|
2014-12-02 03:53:56 +00:00
|
|
|
public boolean isNthPrevWordBeginningOfSentence(final int n) {
|
2014-09-26 12:46:57 +00:00
|
|
|
if (n <= 0 || n > mPrevWordsCount) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return mPrevWordsInfo[n - 1].mIsBeginningOfSentence;
|
2014-06-25 05:14:37 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
public void outputToArray(final int[][] codePointArrays,
|
|
|
|
final boolean[] isBeginningOfSentenceArray) {
|
2014-09-26 12:46:57 +00:00
|
|
|
for (int i = 0; i < mPrevWordsCount; i++) {
|
2014-06-25 05:14:37 +00:00
|
|
|
final WordInfo wordInfo = mPrevWordsInfo[i];
|
|
|
|
if (wordInfo == null || !wordInfo.isValid()) {
|
|
|
|
codePointArrays[i] = new int[0];
|
|
|
|
isBeginningOfSentenceArray[i] = false;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord);
|
|
|
|
isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence;
|
|
|
|
}
|
2014-05-23 10:58:58 +00:00
|
|
|
}
|
2014-05-27 08:28:29 +00:00
|
|
|
|
2014-09-17 13:04:25 +00:00
|
|
|
public int getPrevWordCount() {
|
2014-09-26 12:46:57 +00:00
|
|
|
return mPrevWordsCount;
|
2014-09-17 13:04:25 +00:00
|
|
|
}
|
|
|
|
|
2014-06-27 08:59:21 +00:00
|
|
|
@Override
|
|
|
|
public int hashCode() {
|
2014-10-01 02:21:08 +00:00
|
|
|
int hashValue = 0;
|
|
|
|
for (final WordInfo wordInfo : mPrevWordsInfo) {
|
|
|
|
if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
hashValue ^= wordInfo.hashCode();
|
|
|
|
}
|
|
|
|
return hashValue;
|
2014-06-27 08:59:21 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public boolean equals(Object o) {
|
|
|
|
if (this == o) return true;
|
2014-09-29 01:52:18 +00:00
|
|
|
if (!(o instanceof NgramContext)) return false;
|
|
|
|
final NgramContext prevWordsInfo = (NgramContext)o;
|
2014-09-19 10:02:08 +00:00
|
|
|
|
2014-09-26 12:46:57 +00:00
|
|
|
final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount);
|
2014-09-19 10:02:08 +00:00
|
|
|
for (int i = 0; i < minLength; i++) {
|
|
|
|
if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
2014-09-26 12:46:57 +00:00
|
|
|
final WordInfo[] longerWordsInfo;
|
|
|
|
final int longerWordsInfoCount;
|
|
|
|
if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) {
|
|
|
|
longerWordsInfo = mPrevWordsInfo;
|
|
|
|
longerWordsInfoCount = mPrevWordsCount;
|
|
|
|
} else {
|
|
|
|
longerWordsInfo = prevWordsInfo.mPrevWordsInfo;
|
|
|
|
longerWordsInfoCount = prevWordsInfo.mPrevWordsCount;
|
|
|
|
}
|
|
|
|
for (int i = minLength; i < longerWordsInfoCount; i++) {
|
2014-09-19 10:02:08 +00:00
|
|
|
if (longerWordsInfo[i] != null
|
|
|
|
&& !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2014-06-27 08:59:21 +00:00
|
|
|
}
|
|
|
|
|
2014-05-27 08:28:29 +00:00
|
|
|
@Override
|
|
|
|
public String toString() {
|
2014-06-25 05:14:37 +00:00
|
|
|
final StringBuffer builder = new StringBuffer();
|
2014-09-26 12:46:57 +00:00
|
|
|
for (int i = 0; i < mPrevWordsCount; i++) {
|
2014-06-25 05:14:37 +00:00
|
|
|
final WordInfo wordInfo = mPrevWordsInfo[i];
|
|
|
|
builder.append("PrevWord[");
|
|
|
|
builder.append(i);
|
|
|
|
builder.append("]: ");
|
2014-09-19 10:02:08 +00:00
|
|
|
if (wordInfo == null) {
|
|
|
|
builder.append("null. ");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
if (!wordInfo.isValid()) {
|
2014-06-25 05:14:37 +00:00
|
|
|
builder.append("Empty. ");
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
builder.append(wordInfo.mWord);
|
|
|
|
builder.append(", isBeginningOfSentence: ");
|
|
|
|
builder.append(wordInfo.mIsBeginningOfSentence);
|
|
|
|
builder.append(". ");
|
|
|
|
}
|
|
|
|
return builder.toString();
|
2014-05-27 08:28:29 +00:00
|
|
|
}
|
2014-05-19 04:55:40 +00:00
|
|
|
}
|