Make WeightedString have ProbabilityInfo.
Bug: 11281877 Bug: 12810574 Change-Id: I265e3d8654c75766cd0e0d09d67ef62b4566298amain
parent
75a3df30f6
commit
df1d3e733e
|
@ -511,7 +511,7 @@ public final class BinaryDictDecoderUtils {
|
||||||
final WeightedString word = getWordAtPosition(dictDecoder, headerSize,
|
final WeightedString word = getWordAtPosition(dictDecoder, headerSize,
|
||||||
bigram.mAddress, options);
|
bigram.mAddress, options);
|
||||||
final int reconstructedFrequency =
|
final int reconstructedFrequency =
|
||||||
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
|
BinaryDictIOUtils.reconstructBigramFrequency(word.getProbability(),
|
||||||
bigram.mFrequency);
|
bigram.mFrequency);
|
||||||
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
|
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
|
||||||
}
|
}
|
||||||
|
@ -618,7 +618,7 @@ public final class BinaryDictDecoderUtils {
|
||||||
// words that are not also registered as unigrams so we don't have to avoid
|
// words that are not also registered as unigrams so we don't have to avoid
|
||||||
// them explicitly here.
|
// them explicitly here.
|
||||||
for (final WeightedString bigram : w.mBigrams) {
|
for (final WeightedString bigram : w.mBigrams) {
|
||||||
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
|
newDict.setBigram(w.mWord, bigram.mWord, bigram.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -67,29 +67,40 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A string with a frequency.
|
* A string with a probability.
|
||||||
*
|
*
|
||||||
* This represents an "attribute", that is either a bigram or a shortcut.
|
* This represents an "attribute", that is either a bigram or a shortcut.
|
||||||
*/
|
*/
|
||||||
public static final class WeightedString {
|
public static final class WeightedString {
|
||||||
public final String mWord;
|
public final String mWord;
|
||||||
public int mFrequency;
|
public ProbabilityInfo mProbabilityInfo;
|
||||||
public WeightedString(String word, int frequency) {
|
|
||||||
|
public WeightedString(final String word, final int probability) {
|
||||||
mWord = word;
|
mWord = word;
|
||||||
mFrequency = frequency;
|
mProbabilityInfo = new ProbabilityInfo(probability);
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getProbability() {
|
||||||
|
return mProbabilityInfo.mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setProbability(final int probability) {
|
||||||
|
mProbabilityInfo = new ProbabilityInfo(probability);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Arrays.hashCode(new Object[] { mWord, mFrequency });
|
return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo.mProbability,
|
||||||
|
mProbabilityInfo.mTimestamp, mProbabilityInfo.mLevel,
|
||||||
|
mProbabilityInfo.mCount });
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public boolean equals(Object o) {
|
public boolean equals(Object o) {
|
||||||
if (o == this) return true;
|
if (o == this) return true;
|
||||||
if (!(o instanceof WeightedString)) return false;
|
if (!(o instanceof WeightedString)) return false;
|
||||||
WeightedString w = (WeightedString)o;
|
final WeightedString w = (WeightedString)o;
|
||||||
return mWord.equals(w.mWord) && mFrequency == w.mFrequency;
|
return mWord.equals(w.mWord) && mProbabilityInfo.equals(w.mProbabilityInfo);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -200,18 +211,18 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Adds a word to the bigram list. Updates the frequency if the word already
|
* Adds a word to the bigram list. Updates the probability if the word already
|
||||||
* exists.
|
* exists.
|
||||||
*/
|
*/
|
||||||
public void addBigram(final String word, final int frequency) {
|
public void addBigram(final String word, final int probability) {
|
||||||
if (mBigrams == null) {
|
if (mBigrams == null) {
|
||||||
mBigrams = new ArrayList<WeightedString>();
|
mBigrams = new ArrayList<WeightedString>();
|
||||||
}
|
}
|
||||||
WeightedString bigram = getBigram(word);
|
WeightedString bigram = getBigram(word);
|
||||||
if (bigram != null) {
|
if (bigram != null) {
|
||||||
bigram.mFrequency = frequency;
|
bigram.setProbability(probability);
|
||||||
} else {
|
} else {
|
||||||
bigram = new WeightedString(word, frequency);
|
bigram = new WeightedString(word, probability);
|
||||||
mBigrams.add(bigram);
|
mBigrams.add(bigram);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -273,8 +284,8 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
final WeightedString existingShortcut = getShortcut(shortcut.mWord);
|
final WeightedString existingShortcut = getShortcut(shortcut.mWord);
|
||||||
if (existingShortcut == null) {
|
if (existingShortcut == null) {
|
||||||
mShortcutTargets.add(shortcut);
|
mShortcutTargets.add(shortcut);
|
||||||
} else if (existingShortcut.mFrequency < shortcut.mFrequency) {
|
} else if (existingShortcut.getProbability() < shortcut.getProbability()) {
|
||||||
existingShortcut.mFrequency = shortcut.mFrequency;
|
existingShortcut.setProbability(shortcut.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -289,8 +300,8 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
final WeightedString existingBigram = getBigram(bigram.mWord);
|
final WeightedString existingBigram = getBigram(bigram.mWord);
|
||||||
if (existingBigram == null) {
|
if (existingBigram == null) {
|
||||||
mBigrams.add(bigram);
|
mBigrams.add(bigram);
|
||||||
} else if (existingBigram.mFrequency < bigram.mFrequency) {
|
} else if (existingBigram.getProbability() < bigram.getProbability()) {
|
||||||
existingBigram.mFrequency = bigram.mFrequency;
|
existingBigram.setProbability(bigram.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,6 +17,7 @@
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.BinaryDictionary;
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
public final class ProbabilityInfo {
|
public final class ProbabilityInfo {
|
||||||
public final int mProbability;
|
public final int mProbability;
|
||||||
|
@ -39,8 +40,24 @@ public final class ProbabilityInfo {
|
||||||
mCount = count;
|
mCount = count;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public boolean hasHistoricalInfo() {
|
||||||
|
return mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return mTimestamp + ":" + mLevel + ":" + mCount;
|
return mTimestamp + ":" + mLevel + ":" + mCount;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (o == this) return true;
|
||||||
|
if (!(o instanceof ProbabilityInfo)) return false;
|
||||||
|
final ProbabilityInfo p = (ProbabilityInfo)o;
|
||||||
|
if (!hasHistoricalInfo() && !p.hasHistoricalInfo()) {
|
||||||
|
return mProbability == p.mProbability;
|
||||||
|
}
|
||||||
|
return mProbability == p.mProbability && mTimestamp == p.mTimestamp && mLevel == p.mLevel
|
||||||
|
&& mCount == p.mCount;
|
||||||
|
}
|
||||||
}
|
}
|
|
@ -197,7 +197,7 @@ public class Ver2DictEncoder implements DictEncoder {
|
||||||
final WeightedString target = shortcutIterator.next();
|
final WeightedString target = shortcutIterator.next();
|
||||||
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
|
final int shortcutFlags = BinaryDictEncoderUtils.makeShortcutFlags(
|
||||||
shortcutIterator.hasNext(),
|
shortcutIterator.hasNext(),
|
||||||
target.mFrequency);
|
target.getProbability());
|
||||||
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
|
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, shortcutFlags,
|
||||||
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||||
final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
|
final int shortcutShift = CharEncoding.writeString(mBuffer, mPosition, target.mWord);
|
||||||
|
@ -231,7 +231,7 @@ public class Ver2DictEncoder implements DictEncoder {
|
||||||
final int offset = addressOfBigram
|
final int offset = addressOfBigram
|
||||||
- (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
- (mPosition + FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||||
final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
|
final int bigramFlags = BinaryDictEncoderUtils.makeBigramFlags(bigramIterator.hasNext(),
|
||||||
offset, bigram.mFrequency, unigramFrequencyForThisWord, bigram.mWord);
|
offset, bigram.getProbability(), unigramFrequencyForThisWord, bigram.mWord);
|
||||||
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
|
mPosition = BinaryDictEncoderUtils.writeUIntToBuffer(mBuffer, mPosition, bigramFlags,
|
||||||
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
FormatSpec.PTNODE_ATTRIBUTE_FLAGS_SIZE);
|
||||||
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
|
mPosition += BinaryDictEncoderUtils.writeChildrenPosition(mBuffer, mPosition,
|
||||||
|
|
|
@ -78,7 +78,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
} else {
|
} else {
|
||||||
for (final WeightedString shortcutTarget : word.mShortcutTargets) {
|
for (final WeightedString shortcutTarget : word.mShortcutTargets) {
|
||||||
binaryDict.addUnigramWord(word.mWord, word.mFrequency,
|
binaryDict.addUnigramWord(word.mWord, word.mFrequency,
|
||||||
shortcutTarget.mWord, shortcutTarget.mFrequency,
|
shortcutTarget.mWord, shortcutTarget.getProbability(),
|
||||||
word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
|
word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -89,7 +89,7 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
for (final Word word0 : dict) {
|
for (final Word word0 : dict) {
|
||||||
if (null == word0.mBigrams) continue;
|
if (null == word0.mBigrams) continue;
|
||||||
for (final WeightedString word1 : word0.mBigrams) {
|
for (final WeightedString word1 : word0.mBigrams) {
|
||||||
binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.mFrequency,
|
binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.getProbability(),
|
||||||
0 /* timestamp */);
|
0 /* timestamp */);
|
||||||
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
binaryDict.flushWithGC();
|
binaryDict.flushWithGC();
|
||||||
|
|
|
@ -108,7 +108,7 @@ public class WordProperty {
|
||||||
for (int i = 0; i < mBigramTargets.size(); i++) {
|
for (int i = 0; i < mBigramTargets.size(); i++) {
|
||||||
builder.append(" bigram=" + mBigramTargets.get(i).mWord);
|
builder.append(" bigram=" + mBigramTargets.get(i).mWord);
|
||||||
builder.append(",");
|
builder.append(",");
|
||||||
builder.append("f=" + mBigramTargets.get(i).mFrequency);
|
builder.append("f=" + mBigramTargets.get(i).getProbability());
|
||||||
if (mBigramProbabilityInfo.get(i).mTimestamp
|
if (mBigramProbabilityInfo.get(i).mTimestamp
|
||||||
!= BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
|
!= BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
|
||||||
builder.append(",");
|
builder.append(",");
|
||||||
|
@ -119,7 +119,7 @@ public class WordProperty {
|
||||||
for (int i = 0; i < mShortcutTargets.size(); i++) {
|
for (int i = 0; i < mShortcutTargets.size(); i++) {
|
||||||
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
|
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
|
||||||
builder.append(",");
|
builder.append(",");
|
||||||
builder.append("f=" + mShortcutTargets.get(i).mFrequency);
|
builder.append("f=" + mShortcutTargets.get(i).getProbability());
|
||||||
builder.append("\n");
|
builder.append("\n");
|
||||||
}
|
}
|
||||||
return builder.toString();
|
return builder.toString();
|
||||||
|
|
|
@ -962,7 +962,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
|
for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
|
||||||
final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
|
final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
|
||||||
assertTrue(bigramWord1s.contains(word1));
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
final int probability = unigramProperty.mBigramTargets.get(j).mFrequency;
|
final int probability = unigramProperty.mBigramTargets.get(j).getProbability();
|
||||||
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
|
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
|
||||||
probability);
|
probability);
|
||||||
assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
|
assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
|
||||||
|
@ -1053,7 +1053,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) {
|
for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) {
|
||||||
final String word1 = wordProperty.mBigramTargets.get(j).mWord;
|
final String word1 = wordProperty.mBigramTargets.get(j).mWord;
|
||||||
assertTrue(bigramWord1s.contains(word1));
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
final int probability = wordProperty.mBigramTargets.get(j).mFrequency;
|
final int probability = wordProperty.mBigramTargets.get(j).getProbability();
|
||||||
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
|
assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
|
||||||
bigramSet.remove(bigram);
|
bigramSet.remove(bigram);
|
||||||
|
@ -1087,7 +1087,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
|
WordProperty wordProperty = binaryDictionary.getWordProperty("aaa");
|
||||||
assertEquals(1, wordProperty.mShortcutTargets.size());
|
assertEquals(1, wordProperty.mShortcutTargets.size());
|
||||||
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
|
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
|
||||||
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).mFrequency);
|
assertEquals(shortcutProbability, wordProperty.mShortcutTargets.get(0).getProbability());
|
||||||
final int updatedShortcutProbability = 2;
|
final int updatedShortcutProbability = 2;
|
||||||
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
|
binaryDictionary.addUnigramWord("aaa", unigramProbability, "zzz",
|
||||||
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
updatedShortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
||||||
|
@ -1096,7 +1096,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(1, wordProperty.mShortcutTargets.size());
|
assertEquals(1, wordProperty.mShortcutTargets.size());
|
||||||
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
|
assertEquals("zzz", wordProperty.mShortcutTargets.get(0).mWord);
|
||||||
assertEquals(updatedShortcutProbability,
|
assertEquals(updatedShortcutProbability,
|
||||||
wordProperty.mShortcutTargets.get(0).mFrequency);
|
wordProperty.mShortcutTargets.get(0).getProbability());
|
||||||
binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
|
binaryDictionary.addUnigramWord("aaa", unigramProbability, "yyy",
|
||||||
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
shortcutProbability, false /* isNotAWord */, false /* isBlacklisted */,
|
||||||
0 /* timestamp */);
|
0 /* timestamp */);
|
||||||
|
@ -1107,7 +1107,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(2, wordProperty.mShortcutTargets.size());
|
assertEquals(2, wordProperty.mShortcutTargets.size());
|
||||||
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
|
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
|
||||||
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
|
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
|
||||||
|
shortcutTarget.getProbability());
|
||||||
shortcutTargets.remove(shortcutTarget.mWord);
|
shortcutTargets.remove(shortcutTarget.mWord);
|
||||||
}
|
}
|
||||||
shortcutTargets.put("zzz", updatedShortcutProbability);
|
shortcutTargets.put("zzz", updatedShortcutProbability);
|
||||||
|
@ -1117,7 +1118,8 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
assertEquals(2, wordProperty.mShortcutTargets.size());
|
assertEquals(2, wordProperty.mShortcutTargets.size());
|
||||||
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
|
assertTrue(shortcutTargets.containsKey(shortcutTarget.mWord));
|
||||||
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord), shortcutTarget.mFrequency);
|
assertEquals((int)shortcutTargets.get(shortcutTarget.mWord),
|
||||||
|
shortcutTarget.getProbability());
|
||||||
shortcutTargets.remove(shortcutTarget.mWord);
|
shortcutTargets.remove(shortcutTarget.mWord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1193,7 +1195,7 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
final String targetCodePonts = shortcutTarget.mWord;
|
final String targetCodePonts = shortcutTarget.mWord;
|
||||||
assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
|
assertEquals((int)shortcutTargets.get(word).get(targetCodePonts),
|
||||||
shortcutTarget.mFrequency);
|
shortcutTarget.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -127,7 +127,7 @@ public class CombinedInputOutput {
|
||||||
if (null != word) {
|
if (null != word) {
|
||||||
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
|
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
|
||||||
for (WeightedString s : bigrams) {
|
for (WeightedString s : bigrams) {
|
||||||
dict.setBigram(word, s.mWord, s.mFrequency);
|
dict.setBigram(word, s.mWord, s.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>();
|
if (!shortcuts.isEmpty()) shortcuts = new ArrayList<WeightedString>();
|
||||||
|
@ -185,7 +185,7 @@ public class CombinedInputOutput {
|
||||||
if (null != word) {
|
if (null != word) {
|
||||||
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
|
dict.add(word, freq, shortcuts.isEmpty() ? null : shortcuts, isNotAWord);
|
||||||
for (WeightedString s : bigrams) {
|
for (WeightedString s : bigrams) {
|
||||||
dict.setBigram(word, s.mWord, s.mFrequency);
|
dict.setBigram(word, s.mWord, s.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -222,13 +222,13 @@ public class CombinedInputOutput {
|
||||||
if (null != word.mShortcutTargets) {
|
if (null != word.mShortcutTargets) {
|
||||||
for (WeightedString target : word.mShortcutTargets) {
|
for (WeightedString target : word.mShortcutTargets) {
|
||||||
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
|
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
|
||||||
+ FREQUENCY_TAG + "=" + target.mFrequency + "\n");
|
+ FREQUENCY_TAG + "=" + target.getProbability() + "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (null != word.mBigrams) {
|
if (null != word.mBigrams) {
|
||||||
for (WeightedString bigram : word.mBigrams) {
|
for (WeightedString bigram : word.mBigrams) {
|
||||||
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
|
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
|
||||||
+ FREQUENCY_TAG + "=" + bigram.mFrequency + "\n");
|
+ FREQUENCY_TAG + "=" + bigram.getProbability() + "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,7 +159,7 @@ public class Diff extends Dicttool.Command {
|
||||||
if (null == list0) return false;
|
if (null == list0) return false;
|
||||||
for (final WeightedString attribute0 : list0) {
|
for (final WeightedString attribute0 : list0) {
|
||||||
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
|
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
|
||||||
+ attribute0.mFrequency);
|
+ attribute0.getProbability());
|
||||||
}
|
}
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -175,8 +175,8 @@ public class Diff extends Dicttool.Command {
|
||||||
for (final WeightedString attribute1 : list1) {
|
for (final WeightedString attribute1 : list1) {
|
||||||
if (attribute0.mWord.equals(attribute1.mWord)) {
|
if (attribute0.mWord.equals(attribute1.mWord)) {
|
||||||
System.out.println(type + " freq changed: " + word + " "
|
System.out.println(type + " freq changed: " + word + " "
|
||||||
+ attribute0.mWord + " " + attribute0.mFrequency + " -> "
|
+ attribute0.mWord + " " + attribute0.getProbability() + " -> "
|
||||||
+ attribute1.mFrequency);
|
+ attribute1.getProbability());
|
||||||
list1.remove(attribute1);
|
list1.remove(attribute1);
|
||||||
foundString = true;
|
foundString = true;
|
||||||
break;
|
break;
|
||||||
|
@ -185,7 +185,7 @@ public class Diff extends Dicttool.Command {
|
||||||
if (!foundString) {
|
if (!foundString) {
|
||||||
// We come here if we haven't found any matching string.
|
// We come here if we haven't found any matching string.
|
||||||
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
|
System.out.println(type + " removed: " + word + " " + attribute0.mWord + " "
|
||||||
+ attribute0.mFrequency);
|
+ attribute0.getProbability());
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
list1.remove(attribute0);
|
list1.remove(attribute0);
|
||||||
|
@ -197,7 +197,7 @@ public class Diff extends Dicttool.Command {
|
||||||
for (final WeightedString attribute1 : list1) {
|
for (final WeightedString attribute1 : list1) {
|
||||||
hasDifferences = true;
|
hasDifferences = true;
|
||||||
System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
|
System.out.println(type + " added: " + word + " " + attribute1.mWord + " "
|
||||||
+ attribute1.mFrequency);
|
+ attribute1.getProbability());
|
||||||
}
|
}
|
||||||
return hasDifferences;
|
return hasDifferences;
|
||||||
}
|
}
|
||||||
|
|
|
@ -51,7 +51,8 @@ public class Info extends Dicttool.Command {
|
||||||
if (null != w.mShortcutTargets) {
|
if (null != w.mShortcutTargets) {
|
||||||
shortcutCount += w.mShortcutTargets.size();
|
shortcutCount += w.mShortcutTargets.size();
|
||||||
for (WeightedString shortcutTarget : w.mShortcutTargets) {
|
for (WeightedString shortcutTarget : w.mShortcutTargets) {
|
||||||
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency) {
|
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
||||||
|
== shortcutTarget.getProbability()) {
|
||||||
++whitelistCount;
|
++whitelistCount;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -84,8 +85,9 @@ public class Info extends Dicttool.Command {
|
||||||
} else {
|
} else {
|
||||||
for (final WeightedString shortcutTarget : shortcutTargets) {
|
for (final WeightedString shortcutTarget : shortcutTargets) {
|
||||||
System.out.println(" Shortcut target: " + shortcutTarget.mWord + " ("
|
System.out.println(" Shortcut target: " + shortcutTarget.mWord + " ("
|
||||||
+ (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY == shortcutTarget.mFrequency
|
+ (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
||||||
? "whitelist" : shortcutTarget.mFrequency) + ")");
|
== shortcutTarget.getProbability() ?
|
||||||
|
"whitelist" : shortcutTarget.getProbability()) + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
|
final ArrayList<WeightedString> bigrams = ptNode.getBigrams();
|
||||||
|
@ -93,7 +95,8 @@ public class Info extends Dicttool.Command {
|
||||||
System.out.println(" No bigrams");
|
System.out.println(" No bigrams");
|
||||||
} else {
|
} else {
|
||||||
for (final WeightedString bigram : bigrams) {
|
for (final WeightedString bigram : bigrams) {
|
||||||
System.out.println(" Bigram: " + bigram.mWord + " (" + bigram.mFrequency + ")");
|
System.out.println(
|
||||||
|
" Bigram: " + bigram.mWord + " (" + bigram.getProbability() + ")");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -319,7 +319,7 @@ public class XmlDictInputOutput {
|
||||||
final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
|
final ArrayList<WeightedString> bigramList = bigramMap.get(firstWord);
|
||||||
for (final WeightedString bigram : bigramList) {
|
for (final WeightedString bigram : bigramList) {
|
||||||
if (!dict.hasWord(bigram.mWord)) continue;
|
if (!dict.hasWord(bigram.mWord)) continue;
|
||||||
dict.setBigram(firstWord, bigram.mWord, bigram.mFrequency);
|
dict.setBigram(firstWord, bigram.mWord, bigram.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return dict;
|
return dict;
|
||||||
|
@ -369,7 +369,7 @@ public class XmlDictInputOutput {
|
||||||
destination.write("\n");
|
destination.write("\n");
|
||||||
for (WeightedString target : word.mShortcutTargets) {
|
for (WeightedString target : word.mShortcutTargets) {
|
||||||
destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
|
destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
|
||||||
+ target.mFrequency + "\">" + target.mWord + "</" + SHORTCUT_TAG
|
+ target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
|
||||||
+ ">\n");
|
+ ">\n");
|
||||||
}
|
}
|
||||||
destination.write(" ");
|
destination.write(" ");
|
||||||
|
@ -378,7 +378,8 @@ public class XmlDictInputOutput {
|
||||||
destination.write("\n");
|
destination.write("\n");
|
||||||
for (WeightedString bigram : word.mBigrams) {
|
for (WeightedString bigram : word.mBigrams) {
|
||||||
destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
|
destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
|
||||||
+ bigram.mFrequency + "\">" + bigram.mWord + "</" + BIGRAM_TAG + ">\n");
|
+ bigram.getProbability() + "\">" + bigram.mWord
|
||||||
|
+ "</" + BIGRAM_TAG + ">\n");
|
||||||
}
|
}
|
||||||
destination.write(" ");
|
destination.write(" ");
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in New Issue