Merge "Introduce NgramProperty in Java side."

This commit is contained in:
Keisuke Kuroyanagi 2014-10-01 07:53:03 +00:00 committed by Android (Google) Code Review
commit 108dad1491
12 changed files with 104 additions and 42 deletions

View file

@ -169,8 +169,14 @@ public class NgramContext {
@Override
public int hashCode() {
// Just for having equals().
return mPrevWordsInfo[0].hashCode();
int hashValue = 0;
for (final WordInfo wordInfo : mPrevWordsInfo) {
if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
break;
}
hashValue ^= wordInfo.hashCode();
}
return hashValue;
}
@Override

View file

@ -0,0 +1,26 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.NgramContext;
public class NgramProperty {
public final WeightedString mTargetWord;
public final NgramContext mNgramContext;
public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) {
mTargetWord = targetWord;
mNgramContext = ngramContext;
}
@Override
public int hashCode() {
return mTargetWord.hashCode() ^ mNgramContext.hashCode();
}
@Override
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof NgramProperty)) return false;
final NgramProperty n = (NgramProperty)o;
return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext);
}
}

View file

@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.NgramContext;
import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils;
@ -33,16 +35,17 @@ public final class WordProperty implements Comparable<WordProperty> {
public final String mWord;
public final ProbabilityInfo mProbabilityInfo;
public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams;
public final ArrayList<NgramProperty> mNgrams;
// TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry;
public final boolean mHasShortcuts;
public final boolean mHasBigrams;
public final boolean mHasNgrams;
private int mHashCode = 0;
// TODO: Support n-gram.
@UsedForTesting
public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets,
@ -51,11 +54,17 @@ public final class WordProperty implements Comparable<WordProperty> {
mWord = word;
mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets;
mBigrams = bigrams;
mNgrams = new ArrayList<>();
final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
if (bigrams != null) {
for (final WeightedString bigramTarget : bigrams) {
mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
}
}
mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry;
mHasBigrams = bigrams != null && !bigrams.isEmpty();
mHasNgrams = bigrams != null && !bigrams.isEmpty();
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
}
@ -78,19 +87,24 @@ public final class WordProperty implements Comparable<WordProperty> {
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
mShortcutTargets = new ArrayList<>();
mBigrams = new ArrayList<>();
mNgrams = new ArrayList<>();
mIsBeginningOfSentence = isBeginningOfSentence;
mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted;
mHasShortcuts = hasShortcuts;
mHasBigrams = hasBigram;
mHasNgrams = hasBigram;
final int bigramTargetCount = bigramTargets.size();
for (int i = 0; i < bigramTargetCount; i++) {
final String bigramTargetString =
final int relatedNgramCount = bigramTargets.size();
final WordInfo currentWordInfo =
mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
final NgramContext ngramContext = new NgramContext(currentWordInfo);
for (int i = 0; i < relatedNgramCount; i++) {
final String ngramTargetString =
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
mBigrams.add(new WeightedString(bigramTargetString,
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
final WeightedString ngramTarget = new WeightedString(ngramTargetString,
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)));
// TODO: Support n-gram.
mNgrams.add(new NgramProperty(ngramTarget, ngramContext));
}
final int shortcutTargetCount = shortcutTargets.size();
@ -102,6 +116,17 @@ public final class WordProperty implements Comparable<WordProperty> {
}
}
// TODO: Remove
public ArrayList<WeightedString> getBigrams() {
final ArrayList<WeightedString> bigrams = new ArrayList<>();
for (final NgramProperty ngram : mNgrams) {
if (ngram.mNgramContext.getPrevWordCount() == 1) {
bigrams.add(ngram.mTargetWord);
}
}
return bigrams;
}
public int getProbability() {
return mProbabilityInfo.mProbability;
}
@ -110,8 +135,8 @@ public final class WordProperty implements Comparable<WordProperty> {
return Arrays.hashCode(new Object[] {
word.mWord,
word.mProbabilityInfo,
word.mShortcutTargets.hashCode(),
word.mBigrams.hashCode(),
word.mShortcutTargets,
word.mNgrams,
word.mIsNotAWord,
word.mIsBlacklistEntry
});
@ -142,9 +167,9 @@ public final class WordProperty implements Comparable<WordProperty> {
if (!(o instanceof WordProperty)) return false;
WordProperty w = (WordProperty)o;
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
&& mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams)
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
&& mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
&& mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
}
@Override

View file

@ -67,7 +67,7 @@ public class CombinedFormatUtils {
builder.append("," + BLACKLISTED_TAG + "=true");
}
builder.append("\n");
if (wordProperty.mShortcutTargets != null) {
if (wordProperty.mHasShortcuts) {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
builder.append(",");
@ -75,8 +75,9 @@ public class CombinedFormatUtils {
builder.append("\n");
}
}
if (wordProperty.mBigrams != null) {
for (final WeightedString bigram : wordProperty.mBigrams) {
if (wordProperty.mHasNgrams) {
// TODO: Support ngram.
for (final WeightedString bigram : wordProperty.getBigrams()) {
builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
builder.append(",");
builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));

View file

@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(wordProperty.isValid());
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
assertEquals(false, wordProperty.mHasBigrams);
assertEquals(false, wordProperty.mHasNgrams);
assertEquals(false, wordProperty.mHasShortcuts);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
assertTrue(wordProperty.mShortcutTargets.isEmpty());
@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final HashSet<String> bigramWord1s = bigrams.get(word0);
final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
false /* isBeginningOfSentence */);
assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord;
assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
// TODO: Support ngram.
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
assertEquals(bigramProbability, bigramTarget.getProbability());
}
}
}
@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
wordProperty.mProbabilityInfo.mProbability);
wordSet.remove(word0);
final HashSet<String> bigramWord1s = bigrams.get(word0);
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord;
// TODO: Support ngram.
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1));
final Pair<String, String> bigram = new Pair<>(word0, word1);
if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability());
assertEquals(bigramProbability, bigramTarget.getProbability());
}
bigramSet.remove(bigram);
}

View file

@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertTrue(shortcutList.isEmpty());
}
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
final String word1 = wordProperty.mBigrams.get(j).mWord;
for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = bigramTarget.mWord;
final Pair<String, String> bigram = new Pair<>(word0, word1);
assertTrue(bigramSet.contains(bigram));
bigramSet.remove(bigram);

View file

@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
}
// Insert bigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mBigrams == null) {
if (!wordProperty.mHasNgrams) {
continue;
}
final String word0 = wordProperty.mWord;
for (final WeightedString bigram : wordProperty.mBigrams) {
for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
}
}

View file

@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
}
}
// Insert bigrams into the fusion dictionary.
// TODO: Support ngrams.
for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mBigrams == null) {
if (!wordProperty.mHasNgrams) {
continue;
}
final String word0 = wordProperty.mWord;
for (final WeightedString bigram : wordProperty.mBigrams) {
for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
}
}

View file

@ -102,8 +102,9 @@ public class Ver4DictEncoder implements DictEncoder {
}
}
for (final WordProperty word0Property : dict) {
if (null == word0Property.mBigrams) continue;
for (final WeightedString word1 : word0Property.mBigrams) {
if (!word0Property.mHasNgrams) continue;
// TODO: Support ngram.
for (final WeightedString word1 : word0Property.getBigrams()) {
final NgramContext ngramContext =
new NgramContext(new NgramContext.WordInfo(word0Property.mWord));
if (!binaryDict.addNgramEntry(ngramContext, word1.mWord,

View file

@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command {
hasDifferences = true;
}
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
"Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
"Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
"Shortcut", word0Property.mShortcutTargets,
word1PtNode.getShortcutTargets());

View file

@ -45,8 +45,8 @@ public class Info extends Dicttool.Command {
int whitelistCount = 0;
for (final WordProperty wordProperty : dict) {
++wordCount;
if (null != wordProperty.mBigrams) {
bigramCount += wordProperty.mBigrams.size();
if (wordProperty.mHasNgrams) {
bigramCount += wordProperty.mNgrams.size();
}
if (null != wordProperty.mShortcutTargets) {
shortcutCount += wordProperty.mShortcutTargets.size();

View file

@ -353,7 +353,7 @@ public class XmlDictInputOutput {
+ "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ "\">");
if (null != wordProperty.mShortcutTargets) {
if (wordProperty.mHasShortcuts) {
destination.write("\n");
for (WeightedString target : wordProperty.mShortcutTargets) {
destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
@ -362,9 +362,9 @@ public class XmlDictInputOutput {
}
destination.write(" ");
}
if (null != wordProperty.mBigrams) {
if (wordProperty.mHasNgrams) {
destination.write("\n");
for (WeightedString bigram : wordProperty.mBigrams) {
for (WeightedString bigram : wordProperty.getBigrams()) {
destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ bigram.getProbability() + "\">" + bigram.mWord
+ "</" + BIGRAM_TAG + ">\n");