From df1d3e733e2b000c776e74b54d3c62f0d433b013 Mon Sep 17 00:00:00 2001 From: Keisuke Kuroyanagi Date: Wed, 5 Feb 2014 21:44:55 +0900 Subject: [PATCH] Make WeightedString have ProbabilityInfo. Bug: 11281877 Bug: 12810574 Change-Id: I265e3d8654c75766cd0e0d09d67ef62b4566298a --- .../makedict/BinaryDictDecoderUtils.java | 4 +- .../latin/makedict/FusionDictionary.java | 41 ++++++++++++------- .../latin/makedict/ProbabilityInfo.java | 17 ++++++++ .../latin/makedict/Ver2DictEncoder.java | 4 +- .../latin/makedict/Ver4DictEncoder.java | 4 +- .../inputmethod/latin/utils/WordProperty.java | 4 +- .../latin/BinaryDictionaryTests.java | 16 ++++---- .../latin/dicttool/CombinedInputOutput.java | 8 ++-- .../inputmethod/latin/dicttool/Diff.java | 10 ++--- .../inputmethod/latin/dicttool/Info.java | 11 +++-- .../latin/dicttool/XmlDictInputOutput.java | 7 ++-- 11 files changed, 80 insertions(+), 46 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java index 369184573..9f2345962 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -511,7 +511,7 @@ public final class BinaryDictDecoderUtils { final WeightedString word = getWordAtPosition(dictDecoder, headerSize, bigram.mAddress, options); final int reconstructedFrequency = - BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency, + BinaryDictIOUtils.reconstructBigramFrequency(word.getProbability(), bigram.mFrequency); bigrams.add(new WeightedString(word.mWord, reconstructedFrequency)); } @@ -618,7 +618,7 @@ public final class BinaryDictDecoderUtils { // words that are not also registered as unigrams so we don't have to avoid // them explicitly here. for (final WeightedString bigram : w.mBigrams) { - newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); + newDict.setBigram(w.mWord, bigram.mWord, bigram.getProbability()); } } } diff --git a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java index 5b0e8399a..ef23acb71 100644 --- a/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java +++ b/java/src/com/android/inputmethod/latin/makedict/FusionDictionary.java @@ -67,29 +67,40 @@ public final class FusionDictionary implements Iterable { } /** - * A string with a frequency. + * A string with a probability. * * This represents an "attribute", that is either a bigram or a shortcut. */ public static final class WeightedString { public final String mWord; - public int mFrequency; - public WeightedString(String word, int frequency) { + public ProbabilityInfo mProbabilityInfo; + + public WeightedString(final String word, final int probability) { mWord = word; - mFrequency = frequency; + mProbabilityInfo = new ProbabilityInfo(probability); + } + + public int getProbability() { + return mProbabilityInfo.mProbability; + } + + public void setProbability(final int probability) { + mProbabilityInfo = new ProbabilityInfo(probability); } @Override public int hashCode() { - return Arrays.hashCode(new Object[] { mWord, mFrequency }); + return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo.mProbability, + mProbabilityInfo.mTimestamp, mProbabilityInfo.mLevel, + mProbabilityInfo.mCount }); } @Override public boolean equals(Object o) { if (o == this) return true; if (!(o instanceof WeightedString)) return false; - WeightedString w = (WeightedString)o; - return mWord.equals(w.mWord) && mFrequency == w.mFrequency; + final WeightedString w = (WeightedString)o; + return mWord.equals(w.mWord) && mProbabilityInfo.equals(w.mProbabilityInfo); } } @@ -200,18 +211,18 @@ public final class FusionDictionary implements Iterable { } /** - * Adds a word to the bigram list. Updates the frequency if the word already + * Adds a word to the bigram list. Updates the probability if the word already * exists. */ - public void addBigram(final String word, final int frequency) { + public void addBigram(final String word, final int probability) { if (mBigrams == null) { mBigrams = new ArrayList(); } WeightedString bigram = getBigram(word); if (bigram != null) { - bigram.mFrequency = frequency; + bigram.setProbability(probability); } else { - bigram = new WeightedString(word, frequency); + bigram = new WeightedString(word, probability); mBigrams.add(bigram); } } @@ -273,8 +284,8 @@ public final class FusionDictionary implements Iterable { final WeightedString existingShortcut = getShortcut(shortcut.mWord); if (existingShortcut == null) { mShortcutTargets.add(shortcut); - } else if (existingShortcut.mFrequency < shortcut.mFrequency) { - existingShortcut.mFrequency = shortcut.mFrequency; + } else if (existingShortcut.getProbability() < shortcut.getProbability()) { + existingShortcut.setProbability(shortcut.getProbability()); } } } @@ -289,8 +300,8 @@ public final class FusionDictionary implements Iterable { final WeightedString existingBigram = getBigram(bigram.mWord); if (existingBigram == null) { mBigrams.add(bigram); - } else if (existingBigram.mFrequency < bigram.mFrequency) { - existingBigram.mFrequency = bigram.mFrequency; + } else if (existingBigram.getProbability() < bigram.getProbability()) { + existingBigram.setProbability(bigram.getProbability()); } } } diff --git a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java index c1a43cedf..79f924cc6 100644 --- a/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java +++ b/java/src/com/android/inputmethod/latin/makedict/ProbabilityInfo.java @@ -17,6 +17,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.latin.BinaryDictionary; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; public final class ProbabilityInfo { public final int mProbability; @@ -39,8 +40,24 @@ public final class ProbabilityInfo { mCount = count; } + public boolean hasHistoricalInfo() { + return mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP; + } + @Override public String toString() { return mTimestamp + ":" + mLevel + ":" + mCount; } + + @Override + public boolean equals(Object o) { + if (o == this) return true; + if (!(o instanceof ProbabilityInfo)) return false; + final ProbabilityInfo p = (ProbabilityInfo)o; + if (!hasHistoricalInfo() && !p.hasHistoricalInfo()) { + return mProbability == p.mProbability; + } + return mProbability == p.mProbability && mTimestamp == p.mTimestamp && mLevel == p.mLevel + && mCount == p.mCount; + } } \ No newline at end of file diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java index a3a6c2c34..3de083ef3 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver2DictEncoder.java @@ -197,7 +197,7 @@ public class Ver2DictEncoder implements DictEncoder { final WeightedString target = shortcutIterator.next(); final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags( shortcutIterator.hasNext(), - target.mFrequency); + target.getProbability()); mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags, FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord); @@ -231,7 +231,7 @@ public class Ver2DictEncoder implements DictEncoder { final int offset = addressOfBigram - (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(), - offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord); + offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord); mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags, FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE); mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition, diff --git a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java index 160775d63..a5a613810 100644 --- a/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/Ver4DictEncoder.java @@ -78,7 +78,7 @@ public class Ver4DictEncoder implements DictEncoder { } else { for (final WeightedString shortcutTarget : word.mShortcutTargets) { binaryDict.addUnigramWord(word.mWord, word.mFrequency, - shortcutTarget.mWord, shortcutTarget.mFrequency, + shortcutTarget.mWord, shortcutTarget.getProbability(), word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */); } } @@ -89,7 +89,7 @@ public class Ver4DictEncoder implements DictEncoder { for (final Word word0 : dict) { if (null == word0.mBigrams) continue; for (final WeightedString word1 : word0.mBigrams) { - binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.mFrequency, + binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.getProbability(), 0 /* timestamp */); if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) { binaryDict.flushWithGC(); diff --git a/java/src/com/android/inputmethod/latin/utils/WordProperty.java b/java/src/com/android/inputmethod/latin/utils/WordProperty.java index da56b213f..37d1102e3 100644 --- a/java/src/com/android/inputmethod/latin/utils/WordProperty.java +++ b/java/src/com/android/inputmethod/latin/utils/WordProperty.java @@ -108,7 +108,7 @@ public class WordProperty { for (int i = 0; i < mBigramTargets.size(); i++) { builder.append(" bigram=" + mBigramTargets.get(i).mWord); builder.append(","); - builder.append("f=" + mBigramTargets.get(i).mFrequency); + builder.append("f=" + mBigramTargets.get(i).getProbability()); if (mBigramProbabilityInfo.get(i).mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) { builder.append(","); @@ -119,7 +119,7 @@ public class WordProperty { for (int i = 0; i < mShortcutTargets.size(); i++) { builder.append(" shortcut=" + mShortcutTargets.get(i).mWord); builder.append(","); - builder.append("f=" + mShortcutTargets.get(i).mFrequency); + builder.append("f=" + mShortcutTargets.get(i).getProbability()); builder.append("\n"); } return builder.toString(); diff --git a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java index bab86e546..3e42f3423 100644 --- a/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java +++ b/tests/src/com/android/inputmethod/latin/BinaryDictionaryTests.java @@ -962,7 +962,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) { final String word1 = unigramProperty.mBigramTargets.get(j).mWord; assertTrue(bigramWord1s.contains(word1)); - final int probability = unigramProperty.mBigramTargets.get(j).mFrequency; + final int probability = unigramProperty.mBigramTargets.get(j).getProbability(); assertEquals((int)bigramProbabilities.get(new Pair(word0, word1)), probability); assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability, @@ -1053,7 +1053,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) { final String word1 = wordProperty.mBigramTargets.get(j).mWord; assertTrue(bigramWord1s.contains(word1)); - final int probability = wordProperty.mBigramTargets.get(j).mFrequency; + final int probability = wordProperty.mBigramTargets.get(j).getProbability(); final Pair bigram = new Pair(word0, word1); assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability); bigramSet.remove(bigram); @@ -1087,7 +1087,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { WordProperty wordProperty = binaryDictionary.getWordProperty("aaa"); assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); - assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).mFrequency); + assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability()); final int updatedShortcutProbability = 2; binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz", updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, @@ -1096,7 +1096,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(1, wordProperty.mShortcutTargets.size()); assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord); assertEquals(updatedShortcutProbability, - wordProperty.mShortcutTargets.get(0).mFrequency); + wordProperty.mShortcutTargets.get(0).getProbability()); binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy", shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */, 0 /* timestamp */); @@ -1107,7 +1107,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(2, wordProperty.mShortcutTargets.size()); for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); - assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); shortcutTargets.remove(shortcutTarget.mWord); } shortcutTargets.put("zzz", updatedShortcutProbability); @@ -1117,7 +1118,8 @@ public class BinaryDictionaryTests extends AndroidTestCase { assertEquals(2, wordProperty.mShortcutTargets.size()); for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) { assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord)); - assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency); + assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), + shortcutTarget.getProbability()); shortcutTargets.remove(shortcutTarget.mWord); } } @@ -1193,7 +1195,7 @@ public class BinaryDictionaryTests extends AndroidTestCase { for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) { final String targetCodePonts = shortcutTarget.mWord; assertEquals((int)shortcutTargets.get(word).get(targetCodePonts), - shortcutTarget.mFrequency); + shortcutTarget.getProbability()); } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java index 16f82dafd..b9840607a 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/CombinedInputOutput.java @@ -127,7 +127,7 @@ public class CombinedInputOutput { if (null != word) { dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); for (WeightedString s : bigrams) { - dict.setBigram(word, s.mWord, s.mFrequency); + dict.setBigram(word, s.mWord, s.getProbability()); } } if (!shortcuts.isEmpty()) shortcuts = new ArrayList(); @@ -185,7 +185,7 @@ public class CombinedInputOutput { if (null != word) { dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord); for (WeightedString s : bigrams) { - dict.setBigram(word, s.mWord, s.mFrequency); + dict.setBigram(word, s.mWord, s.getProbability()); } } @@ -222,13 +222,13 @@ public class CombinedInputOutput { if (null != word.mShortcutTargets) { for (WeightedString target : word.mShortcutTargets) { destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + "," - + FREQUENCY_TAG + "=" + target.mFrequency + "\n"); + + FREQUENCY_TAG + "=" + target.getProbability() + "\n"); } } if (null != word.mBigrams) { for (WeightedString bigram : word.mBigrams) { destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + "," - + FREQUENCY_TAG + "=" + bigram.mFrequency + "\n"); + + FREQUENCY_TAG + "=" + bigram.getProbability() + "\n"); } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java index 7ac3c67a1..c9f6bd508 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Diff.java @@ -159,7 +159,7 @@ public class Diff extends Dicttool.Command { if (null == list0) return false; for (final WeightedString attribute0 : list0) { System.out.println(type + " removed: " + word + " " + attribute0.mWord + " " - + attribute0.mFrequency); + + attribute0.getProbability()); } return true; } @@ -175,8 +175,8 @@ public class Diff extends Dicttool.Command { for (final WeightedString attribute1 : list1) { if (attribute0.mWord.equals(attribute1.mWord)) { System.out.println(type + " freq changed: " + word + " " - + attribute0.mWord + " " + attribute0.mFrequency + " -> " - + attribute1.mFrequency); + + attribute0.mWord + " " + attribute0.getProbability() + " -> " + + attribute1.getProbability()); list1.remove(attribute1); foundString = true; break; @@ -185,7 +185,7 @@ public class Diff extends Dicttool.Command { if (!foundString) { // We come here if we haven't found any matching string. System.out.println(type + " removed: " + word + " " + attribute0.mWord + " " - + attribute0.mFrequency); + + attribute0.getProbability()); } } else { list1.remove(attribute0); @@ -197,7 +197,7 @@ public class Diff extends Dicttool.Command { for (final WeightedString attribute1 : list1) { hasDifferences = true; System.out.println(type + " added: " + word + " " + attribute1.mWord + " " - + attribute1.mFrequency); + + attribute1.getProbability()); } return hasDifferences; } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java index 350f42772..8f17fcd94 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/Info.java @@ -51,7 +51,8 @@ public class Info extends Dicttool.Command { if (null != w.mShortcutTargets) { shortcutCount += w.mShortcutTargets.size(); for (WeightedString shortcutTarget : w.mShortcutTargets) { - if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency) { + if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY + == shortcutTarget.getProbability()) { ++whitelistCount; } } @@ -84,8 +85,9 @@ public class Info extends Dicttool.Command { } else { for (final WeightedString shortcutTarget : shortcutTargets) { System.out.println(" Shortcut target: " + shortcutTarget.mWord + " (" - + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency - ? "whitelist" : shortcutTarget.mFrequency) + ")"); + + (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY + == shortcutTarget.getProbability() ? + "whitelist" : shortcutTarget.getProbability()) + ")"); } } final ArrayList bigrams = ptNode.getBigrams(); @@ -93,7 +95,8 @@ public class Info extends Dicttool.Command { System.out.println(" No bigrams"); } else { for (final WeightedString bigram : bigrams) { - System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")"); + System.out.println( + " Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")"); } } } diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java index d226251c1..cdc487b16 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/XmlDictInputOutput.java @@ -319,7 +319,7 @@ public class XmlDictInputOutput { final ArrayList bigramList = bigramMap.get(firstWord); for (final WeightedString bigram : bigramList) { if (!dict.hasWord(bigram.mWord)) continue; - dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency); + dict.setBigram(firstWord, bigram.mWord, bigram.getProbability()); } } return dict; @@ -369,7 +369,7 @@ public class XmlDictInputOutput { destination.write("\n"); for (WeightedString target : word.mShortcutTargets) { destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\"" - + target.mFrequency + "\">" + target.mWord + "" + target.mWord + "\n"); } destination.write(" "); @@ -378,7 +378,8 @@ public class XmlDictInputOutput { destination.write("\n"); for (WeightedString bigram : word.mBigrams) { destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\"" - + bigram.mFrequency + "\">" + bigram.mWord + "\n"); + + bigram.getProbability() + "\">" + bigram.mWord + + "\n"); } destination.write(" "); }