Merge "Introduce NgramProperty in Java side."
This commit is contained in:
commit
108dad1491
12 changed files with 104 additions and 42 deletions
|
@ -169,8 +169,14 @@ public class NgramContext {
|
|||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
// Just for having equals().
|
||||
return mPrevWordsInfo[0].hashCode();
|
||||
int hashValue = 0;
|
||||
for (final WordInfo wordInfo : mPrevWordsInfo) {
|
||||
if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
|
||||
break;
|
||||
}
|
||||
hashValue ^= wordInfo.hashCode();
|
||||
}
|
||||
return hashValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -0,0 +1,26 @@
|
|||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.NgramContext;
|
||||
|
||||
public class NgramProperty {
|
||||
public final WeightedString mTargetWord;
|
||||
public final NgramContext mNgramContext;
|
||||
|
||||
public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) {
|
||||
mTargetWord = targetWord;
|
||||
mNgramContext = ngramContext;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return mTargetWord.hashCode() ^ mNgramContext.hashCode();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (o == this) return true;
|
||||
if (!(o instanceof NgramProperty)) return false;
|
||||
final NgramProperty n = (NgramProperty)o;
|
||||
return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext);
|
||||
}
|
||||
}
|
|
@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict;
|
|||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.BinaryDictionary;
|
||||
import com.android.inputmethod.latin.NgramContext;
|
||||
import com.android.inputmethod.latin.NgramContext.WordInfo;
|
||||
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
||||
import com.android.inputmethod.latin.utils.StringUtils;
|
||||
|
||||
|
@ -33,16 +35,17 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
public final String mWord;
|
||||
public final ProbabilityInfo mProbabilityInfo;
|
||||
public final ArrayList<WeightedString> mShortcutTargets;
|
||||
public final ArrayList<WeightedString> mBigrams;
|
||||
public final ArrayList<NgramProperty> mNgrams;
|
||||
// TODO: Support mIsBeginningOfSentence.
|
||||
public final boolean mIsBeginningOfSentence;
|
||||
public final boolean mIsNotAWord;
|
||||
public final boolean mIsBlacklistEntry;
|
||||
public final boolean mHasShortcuts;
|
||||
public final boolean mHasBigrams;
|
||||
public final boolean mHasNgrams;
|
||||
|
||||
private int mHashCode = 0;
|
||||
|
||||
// TODO: Support n-gram.
|
||||
@UsedForTesting
|
||||
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
|
||||
final ArrayList<WeightedString> shortcutTargets,
|
||||
|
@ -51,11 +54,17 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
mWord = word;
|
||||
mProbabilityInfo = probabilityInfo;
|
||||
mShortcutTargets = shortcutTargets;
|
||||
mBigrams = bigrams;
|
||||
mNgrams = new ArrayList<>();
|
||||
final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
|
||||
if (bigrams != null) {
|
||||
for (final WeightedString bigramTarget : bigrams) {
|
||||
mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
|
||||
}
|
||||
}
|
||||
mIsBeginningOfSentence = false;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklistEntry;
|
||||
mHasBigrams = bigrams != null && !bigrams.isEmpty();
|
||||
mHasNgrams = bigrams != null && !bigrams.isEmpty();
|
||||
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
|
||||
}
|
||||
|
||||
|
@ -78,19 +87,24 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
|
||||
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
|
||||
mShortcutTargets = new ArrayList<>();
|
||||
mBigrams = new ArrayList<>();
|
||||
mNgrams = new ArrayList<>();
|
||||
mIsBeginningOfSentence = isBeginningOfSentence;
|
||||
mIsNotAWord = isNotAWord;
|
||||
mIsBlacklistEntry = isBlacklisted;
|
||||
mHasShortcuts = hasShortcuts;
|
||||
mHasBigrams = hasBigram;
|
||||
mHasNgrams = hasBigram;
|
||||
|
||||
final int bigramTargetCount = bigramTargets.size();
|
||||
for (int i = 0; i < bigramTargetCount; i++) {
|
||||
final String bigramTargetString =
|
||||
final int relatedNgramCount = bigramTargets.size();
|
||||
final WordInfo currentWordInfo =
|
||||
mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
|
||||
final NgramContext ngramContext = new NgramContext(currentWordInfo);
|
||||
for (int i = 0; i < relatedNgramCount; i++) {
|
||||
final String ngramTargetString =
|
||||
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
|
||||
mBigrams.add(new WeightedString(bigramTargetString,
|
||||
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
|
||||
final WeightedString ngramTarget = new WeightedString(ngramTargetString,
|
||||
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)));
|
||||
// TODO: Support n-gram.
|
||||
mNgrams.add(new NgramProperty(ngramTarget, ngramContext));
|
||||
}
|
||||
|
||||
final int shortcutTargetCount = shortcutTargets.size();
|
||||
|
@ -102,6 +116,17 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
}
|
||||
}
|
||||
|
||||
// TODO: Remove
|
||||
public ArrayList<WeightedString> getBigrams() {
|
||||
final ArrayList<WeightedString> bigrams = new ArrayList<>();
|
||||
for (final NgramProperty ngram : mNgrams) {
|
||||
if (ngram.mNgramContext.getPrevWordCount() == 1) {
|
||||
bigrams.add(ngram.mTargetWord);
|
||||
}
|
||||
}
|
||||
return bigrams;
|
||||
}
|
||||
|
||||
public int getProbability() {
|
||||
return mProbabilityInfo.mProbability;
|
||||
}
|
||||
|
@ -110,8 +135,8 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
return Arrays.hashCode(new Object[] {
|
||||
word.mWord,
|
||||
word.mProbabilityInfo,
|
||||
word.mShortcutTargets.hashCode(),
|
||||
word.mBigrams.hashCode(),
|
||||
word.mShortcutTargets,
|
||||
word.mNgrams,
|
||||
word.mIsNotAWord,
|
||||
word.mIsBlacklistEntry
|
||||
});
|
||||
|
@ -142,9 +167,9 @@ public final class WordProperty implements Comparable<WordProperty> {
|
|||
if (!(o instanceof WordProperty)) return false;
|
||||
WordProperty w = (WordProperty)o;
|
||||
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
|
||||
&& mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
|
||||
&& mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams)
|
||||
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
|
||||
&& mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
|
||||
&& mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
@ -67,7 +67,7 @@ public class CombinedFormatUtils {
|
|||
builder.append("," + BLACKLISTED_TAG + "=true");
|
||||
}
|
||||
builder.append("\n");
|
||||
if (wordProperty.mShortcutTargets != null) {
|
||||
if (wordProperty.mHasShortcuts) {
|
||||
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||
builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
|
||||
builder.append(",");
|
||||
|
@ -75,8 +75,9 @@ public class CombinedFormatUtils {
|
|||
builder.append("\n");
|
||||
}
|
||||
}
|
||||
if (wordProperty.mBigrams != null) {
|
||||
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||
if (wordProperty.mHasNgrams) {
|
||||
// TODO: Support ngram.
|
||||
for (final WeightedString bigram : wordProperty.getBigrams()) {
|
||||
builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
|
||||
builder.append(",");
|
||||
builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));
|
||||
|
|
|
@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
assertTrue(wordProperty.isValid());
|
||||
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
|
||||
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
|
||||
assertEquals(false, wordProperty.mHasBigrams);
|
||||
assertEquals(false, wordProperty.mHasNgrams);
|
||||
assertEquals(false, wordProperty.mHasShortcuts);
|
||||
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
|
||||
assertTrue(wordProperty.mShortcutTargets.isEmpty());
|
||||
|
@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
||||
final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
|
||||
false /* isBeginningOfSentence */);
|
||||
assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
|
||||
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||
assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
|
||||
// TODO: Support ngram.
|
||||
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
|
||||
final String word1 = bigramTarget.mWord;
|
||||
assertTrue(bigramWord1s.contains(word1));
|
||||
if (canCheckBigramProbability(formatVersion)) {
|
||||
final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
|
||||
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
|
||||
assertEquals(bigramProbability, bigramTarget.getProbability());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
|||
wordProperty.mProbabilityInfo.mProbability);
|
||||
wordSet.remove(word0);
|
||||
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
||||
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||
// TODO: Support ngram.
|
||||
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
|
||||
final String word1 = bigramTarget.mWord;
|
||||
assertTrue(bigramWord1s.contains(word1));
|
||||
final Pair<String, String> bigram = new Pair<>(word0, word1);
|
||||
if (canCheckBigramProbability(formatVersion)) {
|
||||
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
|
||||
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
|
||||
assertEquals(bigramProbability, bigramTarget.getProbability());
|
||||
}
|
||||
bigramSet.remove(bigram);
|
||||
}
|
||||
|
|
|
@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
assertTrue(shortcutList.isEmpty());
|
||||
}
|
||||
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
|
||||
final String word1 = bigramTarget.mWord;
|
||||
final Pair<String, String> bigram = new Pair<>(word0, word1);
|
||||
assertTrue(bigramSet.contains(bigram));
|
||||
bigramSet.remove(bigram);
|
||||
|
|
|
@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
|
|||
}
|
||||
// Insert bigrams into the fusion dictionary.
|
||||
for (final WordProperty wordProperty : wordProperties) {
|
||||
if (wordProperty.mBigrams == null) {
|
||||
if (!wordProperty.mHasNgrams) {
|
||||
continue;
|
||||
}
|
||||
final String word0 = wordProperty.mWord;
|
||||
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||
for (final WeightedString bigram : wordProperty.getBigrams()) {
|
||||
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
|
|||
}
|
||||
}
|
||||
// Insert bigrams into the fusion dictionary.
|
||||
// TODO: Support ngrams.
|
||||
for (final WordProperty wordProperty : wordProperties) {
|
||||
if (wordProperty.mBigrams == null) {
|
||||
if (!wordProperty.mHasNgrams) {
|
||||
continue;
|
||||
}
|
||||
final String word0 = wordProperty.mWord;
|
||||
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||
for (final WeightedString bigram : wordProperty.getBigrams()) {
|
||||
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -102,8 +102,9 @@ public class Ver4DictEncoder implements DictEncoder {
|
|||
}
|
||||
}
|
||||
for (final WordProperty word0Property : dict) {
|
||||
if (null == word0Property.mBigrams) continue;
|
||||
for (final WeightedString word1 : word0Property.mBigrams) {
|
||||
if (!word0Property.mHasNgrams) continue;
|
||||
// TODO: Support ngram.
|
||||
for (final WeightedString word1 : word0Property.getBigrams()) {
|
||||
final NgramContext ngramContext =
|
||||
new NgramContext(new NgramContext.WordInfo(word0Property.mWord));
|
||||
if (!binaryDict.addNgramEntry(ngramContext, word1.mWord,
|
||||
|
|
|
@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command {
|
|||
hasDifferences = true;
|
||||
}
|
||||
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
|
||||
"Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
|
||||
"Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
|
||||
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
|
||||
"Shortcut", word0Property.mShortcutTargets,
|
||||
word1PtNode.getShortcutTargets());
|
||||
|
|
|
@ -45,8 +45,8 @@ public class Info extends Dicttool.Command {
|
|||
int whitelistCount = 0;
|
||||
for (final WordProperty wordProperty : dict) {
|
||||
++wordCount;
|
||||
if (null != wordProperty.mBigrams) {
|
||||
bigramCount += wordProperty.mBigrams.size();
|
||||
if (wordProperty.mHasNgrams) {
|
||||
bigramCount += wordProperty.mNgrams.size();
|
||||
}
|
||||
if (null != wordProperty.mShortcutTargets) {
|
||||
shortcutCount += wordProperty.mShortcutTargets.size();
|
||||
|
|
|
@ -353,7 +353,7 @@ public class XmlDictInputOutput {
|
|||
+ "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
|
||||
+ (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
|
||||
+ "\">");
|
||||
if (null != wordProperty.mShortcutTargets) {
|
||||
if (wordProperty.mHasShortcuts) {
|
||||
destination.write("\n");
|
||||
for (WeightedString target : wordProperty.mShortcutTargets) {
|
||||
destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
|
||||
|
@ -362,9 +362,9 @@ public class XmlDictInputOutput {
|
|||
}
|
||||
destination.write(" ");
|
||||
}
|
||||
if (null != wordProperty.mBigrams) {
|
||||
if (wordProperty.mHasNgrams) {
|
||||
destination.write("\n");
|
||||
for (WeightedString bigram : wordProperty.mBigrams) {
|
||||
for (WeightedString bigram : wordProperty.getBigrams()) {
|
||||
destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
|
||||
+ bigram.getProbability() + "\">" + bigram.mWord
|
||||
+ "</" + BIGRAM_TAG + ">\n");
|
||||
|
|
Loading…
Reference in a new issue