Make WeightedString have ProbabilityInfo.

Bug: 11281877
Bug: 12810574
Change-Id: I265e3d8654c75766cd0e0d09d67ef62b4566298a
This commit is contained in:
Keisuke Kuroyanagi 2014-02-05 21:44:55 +09:00
parent 75a3df30f6
commit df1d3e733e
11 changed files with 80 additions and 46 deletions

View file

@ -511,7 +511,7 @@ public final class BinaryDictDecoderUtils {
final WeightedString word = getWordAtPosition(dictDecoder, headerSize,
bigram.mAddress, options);
final int reconstructedFrequency =
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
BinaryDictIOUtils.reconstructBigramFrequency(word.getProbability(),
bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
}
@ -618,7 +618,7 @@ public final class BinaryDictDecoderUtils {
// words that are not also registered as unigrams so we don't have to avoid
// them explicitly here.
for (final WeightedString bigram : w.mBigrams) {
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
newDict.setBigram(w.mWord, bigram.mWord, bigram.getProbability());
}
}
}

View file

@ -67,29 +67,40 @@ public final class FusionDictionary implements Iterable<Word> {
}
/**
* A string with a frequency.
* A string with a probability.
*
* This represents an "attribute", that is either a bigram or a shortcut.
*/
public static final class WeightedString {
public final String mWord;
public int mFrequency;
public WeightedString(String word, int frequency) {
public ProbabilityInfo mProbabilityInfo;
public WeightedString(final String word, final int probability) {
mWord = word;
mFrequency = frequency;
mProbabilityInfo = new ProbabilityInfo(probability);
}
public int getProbability() {
return mProbabilityInfo.mProbability;
}
public void setProbability(final int probability) {
mProbabilityInfo = new ProbabilityInfo(probability);
}
@Override
public int hashCode() {
return Arrays.hashCode(new Object[] { mWord, mFrequency });
return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo.mProbability,
mProbabilityInfo.mTimestamp, mProbabilityInfo.mLevel,
mProbabilityInfo.mCount });
}
@Override
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof WeightedString)) return false;
WeightedString w = (WeightedString)o;
return mWord.equals(w.mWord) && mFrequency == w.mFrequency;
final WeightedString w = (WeightedString)o;
return mWord.equals(w.mWord) && mProbabilityInfo.equals(w.mProbabilityInfo);
}
}
@ -200,18 +211,18 @@ public final class FusionDictionary implements Iterable<Word> {
}
/**
* Adds a word to the bigram list. Updates the frequency if the word already
* Adds a word to the bigram list. Updates the probability if the word already
* exists.
*/
public void addBigram(final String word, final int frequency) {
public void addBigram(final String word, final int probability) {
if (mBigrams == null) {
mBigrams = new ArrayList<WeightedString>();
}
WeightedString bigram = getBigram(word);
if (bigram != null) {
bigram.mFrequency = frequency;
bigram.setProbability(probability);
} else {
bigram = new WeightedString(word, frequency);
bigram = new WeightedString(word, probability);
mBigrams.add(bigram);
}
}
@ -273,8 +284,8 @@ public final class FusionDictionary implements Iterable<Word> {
final WeightedString existingShortcut = getShortcut(shortcut.mWord);
if (existingShortcut == null) {
mShortcutTargets.add(shortcut);
} else if (existingShortcut.mFrequency < shortcut.mFrequency) {
existingShortcut.mFrequency = shortcut.mFrequency;
} else if (existingShortcut.getProbability() < shortcut.getProbability()) {
existingShortcut.setProbability(shortcut.getProbability());
}
}
}
@ -289,8 +300,8 @@ public final class FusionDictionary implements Iterable<Word> {
final WeightedString existingBigram = getBigram(bigram.mWord);
if (existingBigram == null) {
mBigrams.add(bigram);
} else if (existingBigram.mFrequency < bigram.mFrequency) {
existingBigram.mFrequency = bigram.mFrequency;
} else if (existingBigram.getProbability() < bigram.getProbability()) {
existingBigram.setProbability(bigram.getProbability());
}
}
}

View file

@ -17,6 +17,7 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
public final class ProbabilityInfo {
public final int mProbability;
@ -39,8 +40,24 @@ public final class ProbabilityInfo {
mCount = count;
}
public boolean hasHistoricalInfo() {
return mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP;
}
@Override
public String toString() {
return mTimestamp + ":" + mLevel + ":" + mCount;
}
@Override
public boolean equals(Object o) {
if (o == this) return true;
if (!(o instanceof ProbabilityInfo)) return false;
final ProbabilityInfo p = (ProbabilityInfo)o;
if (!hasHistoricalInfo() && !p.hasHistoricalInfo()) {
return mProbability == p.mProbability;
}
return mProbability == p.mProbability && mTimestamp == p.mTimestamp && mLevel == p.mLevel
&& mCount == p.mCount;
}
}

View file

@ -197,7 +197,7 @@ public class Ver2DictEncoder implements DictEncoder {
final WeightedString target = shortcutIterator.next();
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
shortcutIterator.hasNext(),
target.mFrequency);
target.getProbability());
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
@ -231,7 +231,7 @@ public class Ver2DictEncoder implements DictEncoder {
final int offset = addressOfBigram
- (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord);
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,

View file

@ -78,7 +78,7 @@ public class Ver4DictEncoder implements DictEncoder {
} else {
for (final WeightedString shortcutTarget : word.mShortcutTargets) {
binaryDict.addUnigramWord(word.mWord, word.mFrequency,
shortcutTarget.mWord, shortcutTarget.mFrequency,
shortcutTarget.mWord, shortcutTarget.getProbability(),
word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
}
}
@ -89,7 +89,7 @@ public class Ver4DictEncoder implements DictEncoder {
for (final Word word0 : dict) {
if (null == word0.mBigrams) continue;
for (final WeightedString word1 : word0.mBigrams) {
binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.mFrequency,
binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.getProbability(),
0 /* timestamp */);
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
binaryDict.flushWithGC();

View file

@ -108,7 +108,7 @@ public class WordProperty {
for (int i = 0; i < mBigramTargets.size(); i++) {
builder.append(" bigram=" + mBigramTargets.get(i).mWord);
builder.append(",");
builder.append("f=" + mBigramTargets.get(i).mFrequency);
builder.append("f=" + mBigramTargets.get(i).getProbability());
if (mBigramProbabilityInfo.get(i).mTimestamp
!= BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
builder.append(",");
@ -119,7 +119,7 @@ public class WordProperty {
for (int i = 0; i < mShortcutTargets.size(); i++) {
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
builder.append(",");
builder.append("f=" + mShortcutTargets.get(i).mFrequency);
builder.append("f=" + mShortcutTargets.get(i).getProbability());
builder.append("\n");
}
return builder.toString();

View file

@ -962,7 +962,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
assertTrue(bigramWord1s.contains(word1));
final int probability = unigramProperty.mBigramTargets.get(j).mFrequency;
final int probability = unigramProperty.mBigramTargets.get(j).getProbability();
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
probability);
assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
@ -1053,7 +1053,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) {
final String word1 = wordProperty.mBigramTargets.get(j).mWord;
assertTrue(bigramWord1s.contains(word1));
final int probability = wordProperty.mBigramTargets.get(j).mFrequency;
final int probability = wordProperty.mBigramTargets.get(j).getProbability();
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
bigramSet.remove(bigram);
@ -1087,7 +1087,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).mFrequency);
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
final int updatedShortcutProbability = 2;
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
@ -1096,7 +1096,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(1, wordProperty.mShortcutTargets.size());
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
assertEquals(updatedShortcutProbability,
wordProperty.mShortcutTargets.get(0).mFrequency);
wordProperty.mShortcutTargets.get(0).getProbability());
binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
0 /* timestamp */);
@ -1107,7 +1107,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(2, wordProperty.mShortcutTargets.size());
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
shortcutTarget.getProbability());
shortcutTargets.remove(shortcutTarget.mWord);
}
shortcutTargets.put("zzz", updatedShortcutProbability);
@ -1117,7 +1118,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
assertEquals(2, wordProperty.mShortcutTargets.size());
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
shortcutTarget.getProbability());
shortcutTargets.remove(shortcutTarget.mWord);
}
}
@ -1193,7 +1195,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
final String targetCodePonts = shortcutTarget.mWord;
assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
shortcutTarget.mFrequency);
shortcutTarget.getProbability());
}
}
}

View file

@ -127,7 +127,7 @@ public class CombinedInputOutput {
if (null != word) {
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mFrequency);
dict.setBigram(word, s.mWord, s.getProbability());
}
}
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>();
@ -185,7 +185,7 @@ public class CombinedInputOutput {
if (null != word) {
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
for (WeightedString s : bigrams) {
dict.setBigram(word, s.mWord, s.mFrequency);
dict.setBigram(word, s.mWord, s.getProbability());
}
}
@ -222,13 +222,13 @@ public class CombinedInputOutput {
if (null != word.mShortcutTargets) {
for (WeightedString target : word.mShortcutTargets) {
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
+ FREQUENCY_TAG + "=" + target.mFrequency + "\n");
+ FREQUENCY_TAG + "=" + target.getProbability() + "\n");
}
}
if (null != word.mBigrams) {
for (WeightedString bigram : word.mBigrams) {
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
+ FREQUENCY_TAG + "=" + bigram.mFrequency + "\n");
+ FREQUENCY_TAG + "=" + bigram.getProbability() + "\n");
}
}
}

View file

@ -159,7 +159,7 @@ public class Diff extends Dicttool.Command {
if (null == list0) return false;
for (final WeightedString attribute0 : list0) {
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
+ attribute0.mFrequency);
+ attribute0.getProbability());
}
return true;
}
@ -175,8 +175,8 @@ public class Diff extends Dicttool.Command {
for (final WeightedString attribute1 : list1) {
if (attribute0.mWord.equals(attribute1.mWord)) {
System.out.println(type + " freq changed: " + word + " "
+ attribute0.mWord + " " + attribute0.mFrequency + " -> "
+ attribute1.mFrequency);
+ attribute0.mWord + " " + attribute0.getProbability() + " -> "
+ attribute1.getProbability());
list1.remove(attribute1);
foundString = true;
break;
@ -185,7 +185,7 @@ public class Diff extends Dicttool.Command {
if (!foundString) {
// We come here if we haven't found any matching string.
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
+ attribute0.mFrequency);
+ attribute0.getProbability());
}
} else {
list1.remove(attribute0);
@ -197,7 +197,7 @@ public class Diff extends Dicttool.Command {
for (final WeightedString attribute1 : list1) {
hasDifferences = true;
System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
+ attribute1.mFrequency);
+ attribute1.getProbability());
}
return hasDifferences;
}

View file

@ -51,7 +51,8 @@ public class Info extends Dicttool.Command {
if (null != w.mShortcutTargets) {
shortcutCount += w.mShortcutTargets.size();
for (WeightedString shortcutTarget : w.mShortcutTargets) {
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency) {
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
== shortcutTarget.getProbability()) {
++whitelistCount;
}
}
@ -84,8 +85,9 @@ public class Info extends Dicttool.Command {
} else {
for (final WeightedString shortcutTarget : shortcutTargets) {
System.out.println(" Shortcut target: " + shortcutTarget.mWord + " ("
+ (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency
? "whitelist" : shortcutTarget.mFrequency) + ")");
+ (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
== shortcutTarget.getProbability() ?
"whitelist" : shortcutTarget.getProbability()) + ")");
}
}
final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
@ -93,7 +95,8 @@ public class Info extends Dicttool.Command {
System.out.println(" No bigrams");
} else {
for (final WeightedString bigram : bigrams) {
System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")");
System.out.println(
" Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")");
}
}
}

View file

@ -319,7 +319,7 @@ public class XmlDictInputOutput {
final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
for (final WeightedString bigram : bigramList) {
if (!dict.hasWord(bigram.mWord)) continue;
dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency);
dict.setBigram(firstWord, bigram.mWord, bigram.getProbability());
}
}
return dict;
@ -369,7 +369,7 @@ public class XmlDictInputOutput {
destination.write("\n");
for (WeightedString target : word.mShortcutTargets) {
destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
+ target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
+ ">\n");
}
destination.write(" ");
@ -378,7 +378,8 @@ public class XmlDictInputOutput {
destination.write("\n");
for (WeightedString bigram : word.mBigrams) {
destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
+ bigram.mFrequency + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n");
+ bigram.getProbability() + "\">" + bigram.mWord
+ "</" + BIGRAM_TAG + ">\n");
}
destination.write(" ");
}