Introduce NgramProperty in Java side.

Bug: 14425059
Change-Id: I8b3458ad22730b3dccbe0caea2c5930f5276dc82
This commit is contained in:
Keisuke Kuroyanagi 2014-10-01 11:21:08 +09:00
parent 79bb37d499
commit c6a6f6a990
12 changed files with 104 additions and 42 deletions

View file

@ -169,8 +169,14 @@ public class NgramContext {
@Override @Override
public int hashCode() { public int hashCode() {
// Just for having equals(). int hashValue = 0;
return mPrevWordsInfo[0].hashCode(); for (final WordInfo wordInfo : mPrevWordsInfo) {
if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) {
break;
}
hashValue ^= wordInfo.hashCode();
}
return hashValue;
} }
@Override @Override

View file

@ -0,0 +1,26 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.NgramContext;
public class NgramProperty {
public final WeightedString mTargetWord;
public final NgramContext mNgramContext;
public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) {
mTargetWord = targetWord;
mNgramContext = ngramContext;
}
@Override
public int hashCode() {
return mTargetWord.hashCode() ^ mNgramContext.hashCode();
}
@Override
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof NgramProperty)) return false;
final NgramProperty n = (NgramProperty)o;
return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext);
}
}

View file

@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.NgramContext;
import com.android.inputmethod.latin.NgramContext.WordInfo;
import com.android.inputmethod.latin.utils.CombinedFormatUtils; import com.android.inputmethod.latin.utils.CombinedFormatUtils;
import com.android.inputmethod.latin.utils.StringUtils; import com.android.inputmethod.latin.utils.StringUtils;
@ -33,16 +35,17 @@ public final class WordProperty implements Comparable<WordProperty> {
public final String mWord; public final String mWord;
public final ProbabilityInfo mProbabilityInfo; public final ProbabilityInfo mProbabilityInfo;
public final ArrayList<WeightedString> mShortcutTargets; public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<WeightedString> mBigrams; public final ArrayList<NgramProperty> mNgrams;
// TODO: Support mIsBeginningOfSentence. // TODO: Support mIsBeginningOfSentence.
public final boolean mIsBeginningOfSentence; public final boolean mIsBeginningOfSentence;
public final boolean mIsNotAWord; public final boolean mIsNotAWord;
public final boolean mIsBlacklistEntry; public final boolean mIsBlacklistEntry;
public final boolean mHasShortcuts; public final boolean mHasShortcuts;
public final boolean mHasBigrams; public final boolean mHasNgrams;
private int mHashCode = 0; private int mHashCode = 0;
// TODO: Support n-gram.
@UsedForTesting @UsedForTesting
public WordProperty(final String word, final ProbabilityInfo probabilityInfo, public WordProperty(final String word, final ProbabilityInfo probabilityInfo,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
@ -51,11 +54,17 @@ public final class WordProperty implements Comparable<WordProperty> {
mWord = word; mWord = word;
mProbabilityInfo = probabilityInfo; mProbabilityInfo = probabilityInfo;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mNgrams = new ArrayList<>();
final NgramContext ngramContext = new NgramContext(new WordInfo(mWord));
if (bigrams != null) {
for (final WeightedString bigramTarget : bigrams) {
mNgrams.add(new NgramProperty(bigramTarget, ngramContext));
}
}
mIsBeginningOfSentence = false; mIsBeginningOfSentence = false;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklistEntry; mIsBlacklistEntry = isBlacklistEntry;
mHasBigrams = bigrams != null && !bigrams.isEmpty(); mHasNgrams = bigrams != null && !bigrams.isEmpty();
mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty();
} }
@ -78,19 +87,24 @@ public final class WordProperty implements Comparable<WordProperty> {
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
mShortcutTargets = new ArrayList<>(); mShortcutTargets = new ArrayList<>();
mBigrams = new ArrayList<>(); mNgrams = new ArrayList<>();
mIsBeginningOfSentence = isBeginningOfSentence; mIsBeginningOfSentence = isBeginningOfSentence;
mIsNotAWord = isNotAWord; mIsNotAWord = isNotAWord;
mIsBlacklistEntry = isBlacklisted; mIsBlacklistEntry = isBlacklisted;
mHasShortcuts = hasShortcuts; mHasShortcuts = hasShortcuts;
mHasBigrams = hasBigram; mHasNgrams = hasBigram;
final int bigramTargetCount = bigramTargets.size(); final int relatedNgramCount = bigramTargets.size();
for (int i = 0; i < bigramTargetCount; i++) { final WordInfo currentWordInfo =
final String bigramTargetString = mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord);
final NgramContext ngramContext = new NgramContext(currentWordInfo);
for (int i = 0; i < relatedNgramCount; i++) {
final String ngramTargetString =
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i)); StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
mBigrams.add(new WeightedString(bigramTargetString, final WeightedString ngramTarget = new WeightedString(ngramTargetString,
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)))); createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)));
// TODO: Support n-gram.
mNgrams.add(new NgramProperty(ngramTarget, ngramContext));
} }
final int shortcutTargetCount = shortcutTargets.size(); final int shortcutTargetCount = shortcutTargets.size();
@ -102,6 +116,17 @@ public final class WordProperty implements Comparable<WordProperty> {
} }
} }
// TODO: Remove
public ArrayList<WeightedString> getBigrams() {
final ArrayList<WeightedString> bigrams = new ArrayList<>();
for (final NgramProperty ngram : mNgrams) {
if (ngram.mNgramContext.getPrevWordCount() == 1) {
bigrams.add(ngram.mTargetWord);
}
}
return bigrams;
}
public int getProbability() { public int getProbability() {
return mProbabilityInfo.mProbability; return mProbabilityInfo.mProbability;
} }
@ -110,8 +135,8 @@ public final class WordProperty implements Comparable<WordProperty> {
return Arrays.hashCode(new Object[] { return Arrays.hashCode(new Object[] {
word.mWord, word.mWord,
word.mProbabilityInfo, word.mProbabilityInfo,
word.mShortcutTargets.hashCode(), word.mShortcutTargets,
word.mBigrams.hashCode(), word.mNgrams,
word.mIsNotAWord, word.mIsNotAWord,
word.mIsBlacklistEntry word.mIsBlacklistEntry
}); });
@ -142,9 +167,9 @@ public final class WordProperty implements Comparable<WordProperty> {
if (!(o instanceof WordProperty)) return false; if (!(o instanceof WordProperty)) return false;
WordProperty w = (WordProperty)o; WordProperty w = (WordProperty)o;
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
&& mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams) && mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams)
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
&& mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams; && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams;
} }
@Override @Override

View file

@ -67,7 +67,7 @@ public class CombinedFormatUtils {
builder.append("," + BLACKLISTED_TAG + "=true"); builder.append("," + BLACKLISTED_TAG + "=true");
} }
builder.append("\n"); builder.append("\n");
if (wordProperty.mShortcutTargets != null) { if (wordProperty.mHasShortcuts) {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord); builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord);
builder.append(","); builder.append(",");
@ -75,8 +75,9 @@ public class CombinedFormatUtils {
builder.append("\n"); builder.append("\n");
} }
} }
if (wordProperty.mBigrams != null) { if (wordProperty.mHasNgrams) {
for (final WeightedString bigram : wordProperty.mBigrams) { // TODO: Support ngram.
for (final WeightedString bigram : wordProperty.getBigrams()) {
builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord); builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord);
builder.append(","); builder.append(",");
builder.append(formatProbabilityInfo(bigram.mProbabilityInfo)); builder.append(formatProbabilityInfo(bigram.mProbabilityInfo));

View file

@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertTrue(wordProperty.isValid()); assertTrue(wordProperty.isValid());
assertEquals(isNotAWord, wordProperty.mIsNotAWord); assertEquals(isNotAWord, wordProperty.mIsNotAWord);
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
assertEquals(false, wordProperty.mHasBigrams); assertEquals(false, wordProperty.mHasNgrams);
assertEquals(false, wordProperty.mHasShortcuts); assertEquals(false, wordProperty.mHasShortcuts);
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
assertTrue(wordProperty.mShortcutTargets.isEmpty()); assertTrue(wordProperty.mShortcutTargets.isEmpty());
@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
final HashSet<String> bigramWord1s = bigrams.get(word0); final HashSet<String> bigramWord1s = bigrams.get(word0);
final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, final WordProperty wordProperty = binaryDictionary.getWordProperty(word0,
false /* isBeginningOfSentence */); false /* isBeginningOfSentence */);
assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size());
for (int j = 0; j < wordProperty.mBigrams.size(); j++) { // TODO: Support ngram.
final String word1 = wordProperty.mBigrams.get(j).mWord; for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1)); assertTrue(bigramWord1s.contains(word1));
if (canCheckBigramProbability(formatVersion)) { if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1));
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); assertEquals(bigramProbability, bigramTarget.getProbability());
} }
} }
} }
@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase {
wordProperty.mProbabilityInfo.mProbability); wordProperty.mProbabilityInfo.mProbability);
wordSet.remove(word0); wordSet.remove(word0);
final HashSet<String> bigramWord1s = bigrams.get(word0); final HashSet<String> bigramWord1s = bigrams.get(word0);
for (int j = 0; j < wordProperty.mBigrams.size(); j++) { // TODO: Support ngram.
final String word1 = wordProperty.mBigrams.get(j).mWord; for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = bigramTarget.mWord;
assertTrue(bigramWord1s.contains(word1)); assertTrue(bigramWord1s.contains(word1));
final Pair<String, String> bigram = new Pair<>(word0, word1); final Pair<String, String> bigram = new Pair<>(word0, word1);
if (canCheckBigramProbability(formatVersion)) { if (canCheckBigramProbability(formatVersion)) {
final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram);
assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); assertEquals(bigramProbability, bigramTarget.getProbability());
} }
bigramSet.remove(bigram); bigramSet.remove(bigram);
} }

View file

@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
assertTrue(shortcutList.isEmpty()); assertTrue(shortcutList.isEmpty());
} }
for (int j = 0; j < wordProperty.mBigrams.size(); j++) { for (final WeightedString bigramTarget : wordProperty.getBigrams()) {
final String word1 = wordProperty.mBigrams.get(j).mWord; final String word1 = bigramTarget.mWord;
final Pair<String, String> bigram = new Pair<>(word0, word1); final Pair<String, String> bigram = new Pair<>(word0, word1);
assertTrue(bigramSet.contains(bigram)); assertTrue(bigramSet.contains(bigram));
bigramSet.remove(bigram); bigramSet.remove(bigram);

View file

@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder {
} }
// Insert bigrams into the fusion dictionary. // Insert bigrams into the fusion dictionary.
for (final WordProperty wordProperty : wordProperties) { for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mBigrams == null) { if (!wordProperty.mHasNgrams) {
continue; continue;
} }
final String word0 = wordProperty.mWord; final String word0 = wordProperty.mWord;
for (final WeightedString bigram : wordProperty.mBigrams) { for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
} }
} }

View file

@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder {
} }
} }
// Insert bigrams into the fusion dictionary. // Insert bigrams into the fusion dictionary.
// TODO: Support ngrams.
for (final WordProperty wordProperty : wordProperties) { for (final WordProperty wordProperty : wordProperties) {
if (wordProperty.mBigrams == null) { if (!wordProperty.mHasNgrams) {
continue; continue;
} }
final String word0 = wordProperty.mWord; final String word0 = wordProperty.mWord;
for (final WeightedString bigram : wordProperty.mBigrams) { for (final WeightedString bigram : wordProperty.getBigrams()) {
fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo);
} }
} }

View file

@ -102,8 +102,9 @@ public class Ver4DictEncoder implements DictEncoder {
} }
} }
for (final WordProperty word0Property : dict) { for (final WordProperty word0Property : dict) {
if (null == word0Property.mBigrams) continue; if (!word0Property.mHasNgrams) continue;
for (final WeightedString word1 : word0Property.mBigrams) { // TODO: Support ngram.
for (final WeightedString word1 : word0Property.getBigrams()) {
final NgramContext ngramContext = final NgramContext ngramContext =
new NgramContext(new NgramContext.WordInfo(word0Property.mWord)); new NgramContext(new NgramContext.WordInfo(word0Property.mWord));
if (!binaryDict.addNgramEntry(ngramContext, word1.mWord, if (!binaryDict.addNgramEntry(ngramContext, word1.mWord,

View file

@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command {
hasDifferences = true; hasDifferences = true;
} }
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
"Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams());
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
"Shortcut", word0Property.mShortcutTargets, "Shortcut", word0Property.mShortcutTargets,
word1PtNode.getShortcutTargets()); word1PtNode.getShortcutTargets());

View file

@ -45,8 +45,8 @@ public class Info extends Dicttool.Command {
int whitelistCount = 0; int whitelistCount = 0;
for (final WordProperty wordProperty : dict) { for (final WordProperty wordProperty : dict) {
++wordCount; ++wordCount;
if (null != wordProperty.mBigrams) { if (wordProperty.mHasNgrams) {
bigramCount += wordProperty.mBigrams.size(); bigramCount += wordProperty.mNgrams.size();
} }
if (null != wordProperty.mShortcutTargets) { if (null != wordProperty.mShortcutTargets) {
shortcutCount += wordProperty.mShortcutTargets.size(); shortcutCount += wordProperty.mShortcutTargets.size();

View file

@ -353,7 +353,7 @@ public class XmlDictInputOutput {
+ "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
+ (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
+ "\">"); + "\">");
if (null != wordProperty.mShortcutTargets) { if (wordProperty.mHasShortcuts) {
destination.write("\n"); destination.write("\n");
for (WeightedString target : wordProperty.mShortcutTargets) { for (WeightedString target : wordProperty.mShortcutTargets) {
destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
@ -362,9 +362,9 @@ public class XmlDictInputOutput {
} }
destination.write(" "); destination.write(" ");
} }
if (null != wordProperty.mBigrams) { if (wordProperty.mHasNgrams) {
destination.write("\n"); destination.write("\n");
for (WeightedString bigram : wordProperty.mBigrams) { for (WeightedString bigram : wordProperty.getBigrams()) {
destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
+ bigram.getProbability() + "\">" + bigram.mWord + bigram.getProbability() + "\">" + bigram.mWord
+ "</" + BIGRAM_TAG + ">\n"); + "</" + BIGRAM_TAG + ">\n");