diff --git a/java/src/com/android/inputmethod/latin/NgramContext.java b/java/src/com/android/inputmethod/latin/NgramContext.java index c35c6e2c8..6d438584f 100644 --- a/java/src/com/android/inputmethod/latin/NgramContext.java +++ b/java/src/com/android/inputmethod/latin/NgramContext.java @@ -169,8 +169,14 @@ public class NgramContext { @Override public int hashCode() { - // Just for having equals(). - return mPrevWordsInfo[0].hashCode(); + int hashValue = 0; + for (final WordInfo wordInfo : mPrevWordsInfo) { + if (wordInfo == null || !WordInfo.EMPTY_WORD_INFO.equals(wordInfo)) { + break; + } + hashValue ^= wordInfo.hashCode(); + } + return hashValue; } @Override diff --git a/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java new file mode 100644 index 000000000..99e0e273f --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/NgramProperty.java @@ -0,0 +1,26 @@ +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.latin.NgramContext; + +public class NgramProperty { + public final WeightedString mTargetWord; + public final NgramContext mNgramContext; + + public NgramProperty(final WeightedString targetWord, final NgramContext ngramContext) { + mTargetWord = targetWord; + mNgramContext = ngramContext; + } + + @Override + public int hashCode() { + return mTargetWord.hashCode() ^ mNgramContext.hashCode(); + } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if (!(o instanceof NgramProperty)) return false; + final NgramProperty n = (NgramProperty)o; + return mTargetWord.equals(n.mTargetWord) && mNgramContext.equals(n.mNgramContext); + } +} diff --git a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java index cd78e2235..46705f9db 100644 --- a/java/src/com/android/inputmethod/latin/makedict/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/makedict/WordProperty.java @@ -18,6 +18,8 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.BinaryDictionary; +import com.android.inputmethod.latin.NgramContext; +import com.android.inputmethod.latin.NgramContext.WordInfo; import com.android.inputmethod.latin.utils.CombinedFormatUtils; import com.android.inputmethod.latin.utils.StringUtils; @@ -33,16 +35,17 @@ public final class WordProperty implements Comparable { public final String mWord; public final ProbabilityInfo mProbabilityInfo; public final ArrayList mShortcutTargets; - public final ArrayList mBigrams; + public final ArrayList mNgrams; // TODO: Support mIsBeginningOfSentence. public final boolean mIsBeginningOfSentence; public final boolean mIsNotAWord; public final boolean mIsBlacklistEntry; public final boolean mHasShortcuts; - public final boolean mHasBigrams; + public final boolean mHasNgrams; private int mHashCode = 0; + // TODO: Support n-gram. @UsedForTesting public WordProperty(final String word, final ProbabilityInfo probabilityInfo, final ArrayList shortcutTargets, @@ -51,11 +54,17 @@ public final class WordProperty implements Comparable { mWord = word; mProbabilityInfo = probabilityInfo; mShortcutTargets = shortcutTargets; - mBigrams = bigrams; + mNgrams = new ArrayList<>(); + final NgramContext ngramContext = new NgramContext(new WordInfo(mWord)); + if (bigrams != null) { + for (final WeightedString bigramTarget : bigrams) { + mNgrams.add(new NgramProperty(bigramTarget, ngramContext)); + } + } mIsBeginningOfSentence = false; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklistEntry; - mHasBigrams = bigrams != null && !bigrams.isEmpty(); + mHasNgrams = bigrams != null && !bigrams.isEmpty(); mHasShortcuts = shortcutTargets != null && !shortcutTargets.isEmpty(); } @@ -78,19 +87,24 @@ public final class WordProperty implements Comparable { mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints); mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo); mShortcutTargets = new ArrayList<>(); - mBigrams = new ArrayList<>(); + mNgrams = new ArrayList<>(); mIsBeginningOfSentence = isBeginningOfSentence; mIsNotAWord = isNotAWord; mIsBlacklistEntry = isBlacklisted; mHasShortcuts = hasShortcuts; - mHasBigrams = hasBigram; + mHasNgrams = hasBigram; - final int bigramTargetCount = bigramTargets.size(); - for (int i = 0; i < bigramTargetCount; i++) { - final String bigramTargetString = + final int relatedNgramCount = bigramTargets.size(); + final WordInfo currentWordInfo = + mIsBeginningOfSentence ? WordInfo.BEGINNING_OF_SENTENCE : new WordInfo(mWord); + final NgramContext ngramContext = new NgramContext(currentWordInfo); + for (int i = 0; i < relatedNgramCount; i++) { + final String ngramTargetString = StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i)); - mBigrams.add(new WeightedString(bigramTargetString, - createProbabilityInfoFromArray(bigramProbabilityInfo.get(i)))); + final WeightedString ngramTarget = new WeightedString(ngramTargetString, + createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))); + // TODO: Support n-gram. + mNgrams.add(new NgramProperty(ngramTarget, ngramContext)); } final int shortcutTargetCount = shortcutTargets.size(); @@ -102,6 +116,17 @@ public final class WordProperty implements Comparable { } } + // TODO: Remove + public ArrayList getBigrams() { + final ArrayList bigrams = new ArrayList<>(); + for (final NgramProperty ngram : mNgrams) { + if (ngram.mNgramContext.getPrevWordCount() == 1) { + bigrams.add(ngram.mTargetWord); + } + } + return bigrams; + } + public int getProbability() { return mProbabilityInfo.mProbability; } @@ -110,8 +135,8 @@ public final class WordProperty implements Comparable { return Arrays.hashCode(new Object[] { word.mWord, word.mProbabilityInfo, - word.mShortcutTargets.hashCode(), - word.mBigrams.hashCode(), + word.mShortcutTargets, + word.mNgrams, word.mIsNotAWord, word.mIsBlacklistEntry }); @@ -142,9 +167,9 @@ public final class WordProperty implements Comparable { if (!(o instanceof WordProperty)) return false; WordProperty w = (WordProperty)o; return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord) - && mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams) + && mShortcutTargets.equals(w.mShortcutTargets) && mNgrams.equals(w.mNgrams) && mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry - && mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams; + && mHasNgrams == w.mHasNgrams && mHasShortcuts && w.mHasNgrams; } @Override diff --git a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java index 34f59e8bc..7e8e55990 100644 --- a/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/CombinedFormatUtils.java @@ -67,7 +67,7 @@ public class CombinedFormatUtils { builder.append("," + BLACKLISTED_TAG + "=true"); } builder.append("\n"); - if (wordProperty.mShortcutTargets != null) { + if (wordProperty.mHasShortcuts) { for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { builder.append(" " + SHORTCUT_TAG + "=" + shortcutTarget.mWord); builder.append(","); @@ -75,8 +75,9 @@ public class CombinedFormatUtils { builder.append("\n"); } } - if (wordProperty.mBigrams != null) { - for (final WeightedString bigram : wordProperty.mBigrams) { + if (wordProperty.mHasNgrams) { + // TODO: Support ngram. + for (final WeightedString bigram : wordProperty.getBigrams()) { builder.append(" " + BIGRAM_TAG + "=" + bigram.mWord); builder.append(","); builder.append(formatProbabilityInfo(bigram.mProbabilityInfo)); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index e6f00b668..9c7792cf2 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -1105,7 +1105,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertTrue(wordProperty.isValid()); assertEquals(isNotAWord, wordProperty.mIsNotAWord); assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry); - assertEquals(false, wordProperty.mHasBigrams); + assertEquals(false, wordProperty.mHasNgrams); assertEquals(false, wordProperty.mHasShortcuts); assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability); assertTrue(wordProperty.mShortcutTargets.isEmpty()); @@ -1142,13 +1142,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { final HashSet bigramWord1s = bigrams.get(word0); final WordProperty wordProperty = binaryDictionary.getWordProperty(word0, false /* isBeginningOfSentence */); - assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size()); - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + assertEquals(bigramWord1s.size(), wordProperty.mNgrams.size()); + // TODO: Support ngram. + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; assertTrue(bigramWord1s.contains(word1)); if (canCheckBigramProbability(formatVersion)) { final int bigramProbability = bigramProbabilities.get(new Pair<>(word0, word1)); - assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + assertEquals(bigramProbability, bigramTarget.getProbability()); } } } @@ -1235,13 +1236,14 @@ public class BinaryDictionaryTests extends AndroidTestCase { wordProperty.mProbabilityInfo.mProbability); wordSet.remove(word0); final HashSet bigramWord1s = bigrams.get(word0); - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + // TODO: Support ngram. + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; assertTrue(bigramWord1s.contains(word1)); final Pair bigram = new Pair<>(word0, word1); if (canCheckBigramProbability(formatVersion)) { final int bigramProbability = bigramProbabilitiesToCheckLater.get(bigram); - assertEquals(bigramProbability, wordProperty.mBigrams.get(j).getProbability()); + assertEquals(bigramProbability, bigramTarget.getProbability()); } bigramSet.remove(bigram); } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index 406046a74..f8b68e0ce 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -682,8 +682,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } assertTrue(shortcutList.isEmpty()); } - for (int j = 0; j < wordProperty.mBigrams.size(); j++) { - final String word1 = wordProperty.mBigrams.get(j).mWord; + for (final WeightedString bigramTarget : wordProperty.getBigrams()) { + final String word1 = bigramTarget.mWord; final Pair bigram = new Pair<>(word0, word1); assertTrue(bigramSet.contains(bigram)); bigramSet.remove(bigram); diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java index 65b84d5f7..18f4bcf5f 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver2DictDecoder.java @@ -292,11 +292,11 @@ public class Ver2DictDecoder extends AbstractDictDecoder { } // Insert bigrams into the fusion dictionary. for (final WordProperty wordProperty : wordProperties) { - if (wordProperty.mBigrams == null) { + if (!wordProperty.mHasNgrams) { continue; } final String word0 = wordProperty.mWord; - for (final WeightedString bigram : wordProperty.mBigrams) { + for (final WeightedString bigram : wordProperty.getBigrams()) { fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java index 5e8417ed6..0da915a75 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictDecoder.java @@ -97,12 +97,13 @@ public class Ver4DictDecoder extends AbstractDictDecoder { } } // Insert bigrams into the fusion dictionary. + // TODO: Support ngrams. for (final WordProperty wordProperty : wordProperties) { - if (wordProperty.mBigrams == null) { + if (!wordProperty.mHasNgrams) { continue; } final String word0 = wordProperty.mWord; - for (final WeightedString bigram : wordProperty.mBigrams) { + for (final WeightedString bigram : wordProperty.getBigrams()) { fusionDict.setBigram(word0, bigram.mWord, bigram.mProbabilityInfo); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 74da93766..401ffde6d 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/tests/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -102,8 +102,9 @@ public class Ver4DictEncoder implements DictEncoder { } } for (final WordProperty word0Property : dict) { - if (null == word0Property.mBigrams) continue; - for (final WeightedString word1 : word0Property.mBigrams) { + if (!word0Property.mHasNgrams) continue; + // TODO: Support ngram. + for (final WeightedString word1 : word0Property.getBigrams()) { final NgramContext ngramContext = new NgramContext(new NgramContext.WordInfo(word0Property.mWord)); if (!binaryDict.addNgramEntry(ngramContext, word1.mWord, diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 94d1ae8bb..c6818ce0c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -135,7 +135,7 @@ public class Diff extends Dicttool.Command { hasDifferences = true; } hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, - "Bigram", word0Property.mBigrams, word1PtNode.getBigrams()); + "Bigram", word0Property.getBigrams(), word1PtNode.getBigrams()); hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord, "Shortcut", word0Property.mShortcutTargets, word1PtNode.getShortcutTargets()); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 9b2567fd3..2850e1ff6 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -45,8 +45,8 @@ public class Info extends Dicttool.Command { int whitelistCount = 0; for (final WordProperty wordProperty : dict) { ++wordCount; - if (null != wordProperty.mBigrams) { - bigramCount += wordProperty.mBigrams.size(); + if (wordProperty.mHasNgrams) { + bigramCount += wordProperty.mNgrams.size(); } if (null != wordProperty.mShortcutTargets) { shortcutCount += wordProperty.mShortcutTargets.size(); diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index bdec44761..cd3ce70eb 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -353,7 +353,7 @@ public class XmlDictInputOutput { + "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability() + (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">"); - if (null != wordProperty.mShortcutTargets) { + if (wordProperty.mHasShortcuts) { destination.write("\n"); for (WeightedString target : wordProperty.mShortcutTargets) { destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\"" @@ -362,9 +362,9 @@ public class XmlDictInputOutput { } destination.write(" "); } - if (null != wordProperty.mBigrams) { + if (wordProperty.mHasNgrams) { destination.write("\n"); - for (WeightedString bigram : wordProperty.mBigrams) { + for (WeightedString bigram : wordProperty.getBigrams()) { destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\"" + bigram.getProbability() + "\">" + bigram.mWord + "\n");