2014-02-06 06:13:33 +00:00
|
|
|
/*
|
|
|
|
* Copyright (C) 2011 The Android Open Source Project
|
|
|
|
*
|
|
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
|
|
* you may not use this file except in compliance with the License.
|
|
|
|
* You may obtain a copy of the License at
|
|
|
|
*
|
|
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
|
*
|
|
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
|
|
* See the License for the specific language governing permissions and
|
|
|
|
* limitations under the License.
|
|
|
|
*/
|
|
|
|
|
|
|
|
package com.android.inputmethod.latin.makedict;
|
|
|
|
|
|
|
|
import com.android.inputmethod.annotations.UsedForTesting;
|
|
|
|
import com.android.inputmethod.latin.BinaryDictionary;
|
2014-10-20 05:48:56 +00:00
|
|
|
import com.android.inputmethod.latin.Dictionary;
|
2014-10-01 02:21:08 +00:00
|
|
|
import com.android.inputmethod.latin.NgramContext;
|
|
|
|
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
2014-10-28 12:31:09 +00:00
|
|
|
import com.android.inputmethod.latin.common.StringUtils;
|
2014-02-06 07:09:25 +00:00
|
|
|
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
2014-02-06 06:13:33 +00:00
|
|
|
|
|
|
|
import java.util.ArrayList;
|
|
|
|
import java.util.Arrays;
|
|
|
|
|
2014-10-09 06:26:10 +00:00
|
|
|
import javax.annotation.Nullable;
|
|
|
|
|
2014-02-06 06:13:33 +00:00
|
|
|
/**
|
|
|
|
* Utility class for a word with a probability.
|
|
|
|
*
|
|
|
|
* This is chiefly used to iterate a dictionary.
|
|
|
|
*/
|
|
|
|
public final class WordProperty implements Comparable<WordProperty> {
|
|
|
|
public final String mWord;
|
|
|
|
public final ProbabilityInfo mProbabilityInfo;
|
2014-10-01 02:21:08 +00:00
|
|
|
public final ArrayList<NgramProperty> mNgrams;
|
2014-05-23 10:58:58 +00:00
|
|
|
// TODO: Support mIsBeginningOfSentence.
|
|
|
|
public final boolean mIsBeginningOfSentence;
|
2014-02-06 06:13:33 +00:00
|
|
|
public final boolean mIsNotAWord;
|
2014-10-14 03:13:11 +00:00
|
|
|
public final boolean mIsPossiblyOffensive;
|
2014-10-01 02:21:08 +00:00
|
|
|
public final boolean mHasNgrams;
|
2014-02-06 06:13:33 +00:00
|
|
|
|
|
|
|
private int mHashCode = 0;
|
|
|
|
|
2014-10-01 02:21:08 +00:00
|
|
|
// TODO: Support n-gram.
|
2014-03-28 06:02:00 +00:00
|
|
|
@UsedForTesting
|
2014-02-10 06:05:08 +00:00
|
|
|
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
|
2014-10-09 06:26:10 +00:00
|
|
|
@Nullable final ArrayList<WeightedString> bigrams,
|
2014-10-14 03:13:11 +00:00
|
|
|
final boolean isNotAWord, final boolean isPossiblyOffensive) {
|
2014-02-06 06:13:33 +00:00
|
|
|
mWord = word;
|
2014-02-10 06:05:08 +00:00
|
|
|
mProbabilityInfo = probabilityInfo;
|
2014-10-03 08:55:26 +00:00
|
|
|
if (null == bigrams) {
|
|
|
|
mNgrams = null;
|
|
|
|
} else {
|
|
|
|
mNgrams = new ArrayList<>();
|
|
|
|
final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
|
2014-10-20 05:48:56 +00:00
|
|
|
for (final WeightedString bigramTarget : bigrams) {
|
|
|
|
mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
|
2014-10-01 02:21:08 +00:00
|
|
|
}
|
|
|
|
}
|
2014-05-23 10:58:58 +00:00
|
|
|
mIsBeginningOfSentence = false;
|
2014-02-06 06:13:33 +00:00
|
|
|
mIsNotAWord = isNotAWord;
|
2014-10-14 03:13:11 +00:00
|
|
|
mIsPossiblyOffensive = isPossiblyOffensive;
|
2014-10-01 02:21:08 +00:00
|
|
|
mHasNgrams = bigrams != null && !bigrams.isEmpty();
|
2014-02-06 06:13:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
|
|
|
|
return new ProbabilityInfo(
|
|
|
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
|
|
|
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
|
|
|
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
|
|
|
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
|
|
|
|
}
|
|
|
|
|
|
|
|
// Construct word property using information from native code.
|
|
|
|
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
|
|
|
|
public WordProperty(final int[] codePoints, final boolean isNotAWord,
|
2015-02-10 22:54:38 +00:00
|
|
|
final boolean isPossiblyOffensive, final boolean hasBigram,
|
2014-06-24 03:37:07 +00:00
|
|
|
final boolean isBeginningOfSentence, final int[] probabilityInfo,
|
2014-10-09 06:26:10 +00:00
|
|
|
final ArrayList<int[][]> ngramPrevWordsArray,
|
2014-10-22 09:15:53 +00:00
|
|
|
final ArrayList<boolean[]> ngramPrevWordIsBeginningOfSentenceArray,
|
2015-02-10 22:54:38 +00:00
|
|
|
final ArrayList<int[]> ngramTargets, final ArrayList<int[]> ngramProbabilityInfo) {
|
2014-02-06 06:13:33 +00:00
|
|
|
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
|
|
|
|
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
|
2014-10-03 08:55:26 +00:00
|
|
|
final ArrayList<NgramProperty> ngrams = new ArrayList<>();
|
2014-06-24 03:37:07 +00:00
|
|
|
mIsBeginningOfSentence = isBeginningOfSentence;
|
2014-02-06 06:13:33 +00:00
|
|
|
mIsNotAWord = isNotAWord;
|
2014-10-14 03:13:11 +00:00
|
|
|
mIsPossiblyOffensive = isPossiblyOffensive;
|
2014-10-01 02:21:08 +00:00
|
|
|
mHasNgrams = hasBigram;
|
|
|
|
|
2014-10-09 06:26:10 +00:00
|
|
|
final int relatedNgramCount = ngramTargets.size();
|
2014-10-01 02:21:08 +00:00
|
|
|
for (int i = 0; i < relatedNgramCount; i++) {
|
|
|
|
final String ngramTargetString =
|
2014-10-09 06:26:10 +00:00
|
|
|
StringUtils.getStringFromNullTerminatedCodePointArray(ngramTargets.get(i));
|
2014-10-01 02:21:08 +00:00
|
|
|
final WeightedString ngramTarget = new WeightedString(ngramTargetString,
|
2014-10-09 06:26:10 +00:00
|
|
|
createProbabilityInfoFromArray(ngramProbabilityInfo.get(i)));
|
2014-10-22 09:15:53 +00:00
|
|
|
final int[][] prevWords = ngramPrevWordsArray.get(i);
|
|
|
|
final boolean[] isBeginningOfSentenceArray =
|
|
|
|
ngramPrevWordIsBeginningOfSentenceArray.get(i);
|
|
|
|
final WordInfo[] wordInfoArray = new WordInfo[prevWords.length];
|
|
|
|
for (int j = 0; j < prevWords.length; j++) {
|
|
|
|
wordInfoArray[j] = isBeginningOfSentenceArray[j]
|
|
|
|
? WordInfo.BEGINNING_OF_SENTENCE_WORD_INFO
|
|
|
|
: new WordInfo(StringUtils.getStringFromNullTerminatedCodePointArray(
|
|
|
|
prevWords[j]));
|
|
|
|
}
|
|
|
|
final NgramContext ngramContext = new NgramContext(wordInfoArray);
|
2014-10-03 08:55:26 +00:00
|
|
|
ngrams.add(new NgramProperty(ngramTarget, ngramContext));
|
2014-02-06 06:13:33 +00:00
|
|
|
}
|
2014-10-03 08:55:26 +00:00
|
|
|
mNgrams = ngrams.isEmpty() ? null : ngrams;
|
2014-02-06 06:13:33 +00:00
|
|
|
}
|
|
|
|
|
2014-10-01 02:21:08 +00:00
|
|
|
// TODO: Remove
|
2014-10-22 09:15:53 +00:00
|
|
|
@UsedForTesting
|
2014-10-01 02:21:08 +00:00
|
|
|
public ArrayList<WeightedString> getBigrams() {
|
2014-10-03 08:55:26 +00:00
|
|
|
if (null == mNgrams) {
|
|
|
|
return null;
|
|
|
|
}
|
2014-10-01 02:21:08 +00:00
|
|
|
final ArrayList<WeightedString> bigrams = new ArrayList<>();
|
|
|
|
for (final NgramProperty ngram : mNgrams) {
|
|
|
|
if (ngram.mNgramContext.getPrevWordCount() == 1) {
|
|
|
|
bigrams.add(ngram.mTargetWord);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return bigrams;
|
|
|
|
}
|
|
|
|
|
2014-02-06 06:13:33 +00:00
|
|
|
public int getProbability() {
|
|
|
|
return mProbabilityInfo.mProbability;
|
|
|
|
}
|
|
|
|
|
|
|
|
private static int computeHashCode(WordProperty word) {
|
|
|
|
return Arrays.hashCode(new Object[] {
|
|
|
|
word.mWord,
|
|
|
|
word.mProbabilityInfo,
|
2014-10-01 02:21:08 +00:00
|
|
|
word.mNgrams,
|
2014-02-06 06:13:33 +00:00
|
|
|
word.mIsNotAWord,
|
2014-10-14 03:13:11 +00:00
|
|
|
word.mIsPossiblyOffensive
|
2014-02-06 06:13:33 +00:00
|
|
|
});
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Three-way comparison.
|
|
|
|
*
|
|
|
|
* A Word x is greater than a word y if x has a higher frequency. If they have the same
|
|
|
|
* frequency, they are sorted in lexicographic order.
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public int compareTo(final WordProperty w) {
|
|
|
|
if (getProbability() < w.getProbability()) return 1;
|
|
|
|
if (getProbability() > w.getProbability()) return -1;
|
|
|
|
return mWord.compareTo(w.mWord);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Equality test.
|
|
|
|
*
|
|
|
|
* Words are equal if they have the same frequency, the same spellings, and the same
|
|
|
|
* attributes.
|
|
|
|
*/
|
|
|
|
@Override
|
|
|
|
public boolean equals(Object o) {
|
|
|
|
if (o == this) return true;
|
|
|
|
if (!(o instanceof WordProperty)) return false;
|
|
|
|
WordProperty w = (WordProperty)o;
|
2015-02-10 22:54:38 +00:00
|
|
|
return mProbabilityInfo.equals(w.mProbabilityInfo)
|
|
|
|
&& mWord.equals(w.mWord) && equals(mNgrams, w.mNgrams)
|
2014-10-14 03:13:11 +00:00
|
|
|
&& mIsNotAWord == w.mIsNotAWord && mIsPossiblyOffensive == w.mIsPossiblyOffensive
|
2015-02-10 22:54:38 +00:00
|
|
|
&& mHasNgrams == w.mHasNgrams;
|
2014-02-06 06:13:33 +00:00
|
|
|
}
|
|
|
|
|
2014-10-09 06:26:10 +00:00
|
|
|
// TDOO: Have a utility method like java.util.Objects.equals.
|
|
|
|
private static <T> boolean equals(final ArrayList<T> a, final ArrayList<T> b) {
|
2014-10-03 08:55:26 +00:00
|
|
|
if (null == a) {
|
|
|
|
return null == b;
|
|
|
|
}
|
|
|
|
return a.equals(b);
|
|
|
|
}
|
|
|
|
|
2014-02-06 06:13:33 +00:00
|
|
|
@Override
|
|
|
|
public int hashCode() {
|
|
|
|
if (mHashCode == 0) {
|
|
|
|
mHashCode = computeHashCode(this);
|
|
|
|
}
|
|
|
|
return mHashCode;
|
|
|
|
}
|
|
|
|
|
|
|
|
@UsedForTesting
|
|
|
|
public boolean isValid() {
|
2014-10-20 05:48:56 +00:00
|
|
|
return getProbability() != Dictionary.NOT_A_PROBABILITY;
|
2014-02-06 06:13:33 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
@Override
|
|
|
|
public String toString() {
|
2014-02-06 07:09:25 +00:00
|
|
|
return CombinedFormatUtils.formatWordProperty(this);
|
2014-02-06 06:13:33 +00:00
|
|
|
}
|
|
|
|
}
|