/* * Copyright (C) 2014 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package com.android.inputmethod.latin; import android.text.TextUtils; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.utils.StringUtils; import java.util.Arrays; /** * Class to represent information of previous words. This class is used to add n-gram entries * into binary dictionaries, to get predictions, and to get suggestions. */ public class NgramContext { public static final NgramContext EMPTY_PREV_WORDS_INFO = new NgramContext(WordInfo.EMPTY_WORD_INFO); public static final NgramContext BEGINNING_OF_SENTENCE = new NgramContext(WordInfo.BEGINNING_OF_SENTENCE); /** * Word information used to represent previous words information. */ public static class WordInfo { public static final WordInfo EMPTY_WORD_INFO = new WordInfo(null); public static final WordInfo BEGINNING_OF_SENTENCE = new WordInfo(); // This is an empty char sequence when mIsBeginningOfSentence is true. public final CharSequence mWord; // TODO: Have sentence separator. // Whether the current context is beginning of sentence or not. This is true when composing // at the beginning of an input field or composing a word after a sentence separator. public final boolean mIsBeginningOfSentence; // Beginning of sentence. public WordInfo() { mWord = ""; mIsBeginningOfSentence = true; } public WordInfo(final CharSequence word) { mWord = word; mIsBeginningOfSentence = false; } public boolean isValid() { return mWord != null; } @Override public int hashCode() { return Arrays.hashCode(new Object[] { mWord, mIsBeginningOfSentence } ); } @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof WordInfo)) return false; final WordInfo wordInfo = (WordInfo)o; if (mWord == null || wordInfo.mWord == null) { return mWord == wordInfo.mWord && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; } return TextUtils.equals(mWord, wordInfo.mWord) && mIsBeginningOfSentence == wordInfo.mIsBeginningOfSentence; } } // The words immediately before the considered word. EMPTY_WORD_INFO element means we don't // have any context for that previous word including the "beginning of sentence context" - we // just don't know what to predict using the information. An example of that is after a comma. // For simplicity of implementation, elements may also be EMPTY_WORD_INFO transiently after the // WordComposer was reset and before starting a new composing word, but we should never be // calling getSuggetions* in this situation. private final WordInfo[] mPrevWordsInfo; private final int mPrevWordsCount; // Construct from the previous word information. public NgramContext(final WordInfo... prevWordsInfo) { mPrevWordsInfo = prevWordsInfo; mPrevWordsCount = prevWordsInfo.length; } // Construct from WordInfo array and size. The caller shouldn't change prevWordsInfo after // calling this method. private NgramContext(final NgramContext ngramContext, final int prevWordsCount) { if (ngramContext.mPrevWordsCount < prevWordsCount) { throw new IndexOutOfBoundsException("ngramContext.mPrevWordsCount (" + ngramContext.mPrevWordsCount + ") is smaller than prevWordsCount (" + prevWordsCount + ")"); } mPrevWordsInfo = ngramContext.mPrevWordsInfo; mPrevWordsCount = prevWordsCount; } // Create next prevWordsInfo using current prevWordsInfo. public NgramContext getNextNgramContext(final WordInfo wordInfo) { final int nextPrevWordCount = Math.min(Constants.MAX_PREV_WORD_COUNT_FOR_N_GRAM, mPrevWordsCount + 1); final WordInfo[] prevWordsInfo = new WordInfo[nextPrevWordCount]; prevWordsInfo[0] = wordInfo; System.arraycopy(mPrevWordsInfo, 0, prevWordsInfo, 1, nextPrevWordCount - 1); return new NgramContext(prevWordsInfo); } public boolean isValid() { return mPrevWordsCount > 0 && mPrevWordsInfo[0].isValid(); } public boolean isBeginningOfSentenceContext() { return mPrevWordsCount > 0 && mPrevWordsInfo[0].mIsBeginningOfSentence; } // n is 1-indexed. // TODO: Remove public CharSequence getNthPrevWord(final int n) { if (n <= 0 || n > mPrevWordsCount) { return null; } return mPrevWordsInfo[n - 1].mWord; } // n is 1-indexed. @UsedForTesting public boolean isNthPrevWordBeginningOfSontence(final int n) { if (n <= 0 || n > mPrevWordsCount) { return false; } return mPrevWordsInfo[n - 1].mIsBeginningOfSentence; } public void outputToArray(final int[][] codePointArrays, final boolean[] isBeginningOfSentenceArray) { for (int i = 0; i < mPrevWordsCount; i++) { final WordInfo wordInfo = mPrevWordsInfo[i]; if (wordInfo == null || !wordInfo.isValid()) { codePointArrays[i] = new int[0]; isBeginningOfSentenceArray[i] = false; continue; } codePointArrays[i] = StringUtils.toCodePointArray(wordInfo.mWord); isBeginningOfSentenceArray[i] = wordInfo.mIsBeginningOfSentence; } } public NgramContext getTrimmedNgramContext(final int maxPrevWordCount) { final int newSize = Math.min(maxPrevWordCount, mPrevWordsCount); return new NgramContext(this /* prevWordsInfo */, newSize); } public int getPrevWordCount() { return mPrevWordsCount; } @Override public int hashCode() { // Just for having equals(). return mPrevWordsInfo[0].hashCode(); } @Override public boolean equals(Object o) { if (this == o) return true; if (!(o instanceof NgramContext)) return false; final NgramContext prevWordsInfo = (NgramContext)o; final int minLength = Math.min(mPrevWordsCount, prevWordsInfo.mPrevWordsCount); for (int i = 0; i < minLength; i++) { if (!mPrevWordsInfo[i].equals(prevWordsInfo.mPrevWordsInfo[i])) { return false; } } final WordInfo[] longerWordsInfo; final int longerWordsInfoCount; if (mPrevWordsCount > prevWordsInfo.mPrevWordsCount) { longerWordsInfo = mPrevWordsInfo; longerWordsInfoCount = mPrevWordsCount; } else { longerWordsInfo = prevWordsInfo.mPrevWordsInfo; longerWordsInfoCount = prevWordsInfo.mPrevWordsCount; } for (int i = minLength; i < longerWordsInfoCount; i++) { if (longerWordsInfo[i] != null && !WordInfo.EMPTY_WORD_INFO.equals(longerWordsInfo[i])) { return false; } } return true; } @Override public String toString() { final StringBuffer builder = new StringBuffer(); for (int i = 0; i < mPrevWordsCount; i++) { final WordInfo wordInfo = mPrevWordsInfo[i]; builder.append("PrevWord["); builder.append(i); builder.append("]: "); if (wordInfo == null) { builder.append("null. "); continue; } if (!wordInfo.isValid()) { builder.append("Empty. "); continue; } builder.append(wordInfo.mWord); builder.append(", isBeginningOfSentence: "); builder.append(wordInfo.mIsBeginningOfSentence); builder.append(". "); } return builder.toString(); } }