Merge "Consolidate WordProperty and Word."
commit
337dce8074
|
@ -26,12 +26,12 @@ import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
import com.android.inputmethod.latin.settings.NativeSuggestOptions;
|
import com.android.inputmethod.latin.settings.NativeSuggestOptions;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
import com.android.inputmethod.latin.utils.JniUtils;
|
import com.android.inputmethod.latin.utils.JniUtils;
|
||||||
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
||||||
import com.android.inputmethod.latin.utils.StringUtils;
|
import com.android.inputmethod.latin.utils.StringUtils;
|
||||||
import com.android.inputmethod.latin.utils.WordProperty;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
|
@ -23,13 +23,13 @@ import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.keyboard.ProximityInfo;
|
import com.android.inputmethod.keyboard.ProximityInfo;
|
||||||
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||||
import com.android.inputmethod.latin.utils.AsyncResultHolder;
|
import com.android.inputmethod.latin.utils.AsyncResultHolder;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
import com.android.inputmethod.latin.utils.FileUtils;
|
import com.android.inputmethod.latin.utils.FileUtils;
|
||||||
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
||||||
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
|
import com.android.inputmethod.latin.utils.PrioritizedSerialExecutor;
|
||||||
import com.android.inputmethod.latin.utils.WordProperty;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
|
@ -606,19 +606,21 @@ public final class BinaryDictDecoderUtils {
|
||||||
|
|
||||||
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
||||||
if (null != dict) {
|
if (null != dict) {
|
||||||
for (final Word w : dict) {
|
for (final WordProperty wordProperty : dict) {
|
||||||
if (w.mIsBlacklistEntry) {
|
if (wordProperty.mIsBlacklistEntry) {
|
||||||
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
|
newDict.addBlacklistEntry(wordProperty.mWord, wordProperty.mShortcutTargets,
|
||||||
|
wordProperty.mIsNotAWord);
|
||||||
} else {
|
} else {
|
||||||
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
|
newDict.add(wordProperty.mWord, wordProperty.getProbability(),
|
||||||
|
wordProperty.mShortcutTargets, wordProperty.mIsNotAWord);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (final Word w : dict) {
|
for (final WordProperty wordProperty : dict) {
|
||||||
// By construction a binary dictionary may not have bigrams pointing to
|
// By construction a binary dictionary may not have bigrams pointing to
|
||||||
// words that are not also registered as unigrams so we don't have to avoid
|
// words that are not also registered as unigrams so we don't have to avoid
|
||||||
// them explicitly here.
|
// them explicitly here.
|
||||||
for (final WeightedString bigram : w.mBigrams) {
|
for (final WeightedString bigram : wordProperty.mBigrams) {
|
||||||
newDict.setBigram(w.mWord, bigram.mWord, bigram.getProbability());
|
newDict.setBigram(wordProperty.mWord, bigram.mWord, bigram.getProbability());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -31,7 +31,7 @@ import java.util.LinkedList;
|
||||||
* A dictionary that can fusion heads and tails of words for more compression.
|
* A dictionary that can fusion heads and tails of words for more compression.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public final class FusionDictionary implements Iterable<Word> {
|
public final class FusionDictionary implements Iterable<WordProperty> {
|
||||||
private static final boolean DBG = MakedictLog.DBG;
|
private static final boolean DBG = MakedictLog.DBG;
|
||||||
|
|
||||||
private static int CHARACTER_NOT_FOUND_INDEX = -1;
|
private static int CHARACTER_NOT_FOUND_INDEX = -1;
|
||||||
|
@ -76,8 +76,12 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
public ProbabilityInfo mProbabilityInfo;
|
public ProbabilityInfo mProbabilityInfo;
|
||||||
|
|
||||||
public WeightedString(final String word, final int probability) {
|
public WeightedString(final String word, final int probability) {
|
||||||
|
this(word, new ProbabilityInfo(probability));
|
||||||
|
}
|
||||||
|
|
||||||
|
public WeightedString(final String word, final ProbabilityInfo probabilityInfo) {
|
||||||
mWord = word;
|
mWord = word;
|
||||||
mProbabilityInfo = new ProbabilityInfo(probability);
|
mProbabilityInfo = probabilityInfo;
|
||||||
}
|
}
|
||||||
|
|
||||||
public int getProbability() {
|
public int getProbability() {
|
||||||
|
@ -90,9 +94,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public int hashCode() {
|
public int hashCode() {
|
||||||
return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo.mProbability,
|
return Arrays.hashCode(new Object[] { mWord, mProbabilityInfo});
|
||||||
mProbabilityInfo.mTimestamp, mProbabilityInfo.mLevel,
|
|
||||||
mProbabilityInfo.mCount });
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ -704,7 +706,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
*
|
*
|
||||||
* This is purely for convenience.
|
* This is purely for convenience.
|
||||||
*/
|
*/
|
||||||
public static final class DictionaryIterator implements Iterator<Word> {
|
public static final class DictionaryIterator implements Iterator<WordProperty> {
|
||||||
private static final class Position {
|
private static final class Position {
|
||||||
public Iterator<PtNode> pos;
|
public Iterator<PtNode> pos;
|
||||||
public int length;
|
public int length;
|
||||||
|
@ -734,7 +736,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Word next() {
|
public WordProperty next() {
|
||||||
Position currentPos = mPositions.getLast();
|
Position currentPos = mPositions.getLast();
|
||||||
mCurrentString.setLength(currentPos.length);
|
mCurrentString.setLength(currentPos.length);
|
||||||
|
|
||||||
|
@ -751,7 +753,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
mPositions.addLast(currentPos);
|
mPositions.addLast(currentPos);
|
||||||
}
|
}
|
||||||
if (currentPtNode.mFrequency >= 0) {
|
if (currentPtNode.mFrequency >= 0) {
|
||||||
return new Word(mCurrentString.toString(), currentPtNode.mFrequency,
|
return new WordProperty(mCurrentString.toString(), currentPtNode.mFrequency,
|
||||||
currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
|
currentPtNode.mShortcutTargets, currentPtNode.mBigrams,
|
||||||
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry);
|
currentPtNode.mIsNotAWord, currentPtNode.mIsBlacklistEntry);
|
||||||
}
|
}
|
||||||
|
@ -777,7 +779,7 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
* and say : for (Word w : x) {}
|
* and say : for (Word w : x) {}
|
||||||
*/
|
*/
|
||||||
@Override
|
@Override
|
||||||
public Iterator<Word> iterator() {
|
public Iterator<WordProperty> iterator() {
|
||||||
return new DictionaryIterator(mRootNodeArray.mData);
|
return new DictionaryIterator(mRootNodeArray.mData);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,6 +19,8 @@ package com.android.inputmethod.latin.makedict;
|
||||||
import com.android.inputmethod.latin.BinaryDictionary;
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
public final class ProbabilityInfo {
|
public final class ProbabilityInfo {
|
||||||
public final int mProbability;
|
public final int mProbability;
|
||||||
// mTimestamp, mLevel and mCount are historical info. These values are depend on the
|
// mTimestamp, mLevel and mCount are historical info. These values are depend on the
|
||||||
|
@ -44,9 +46,19 @@ public final class ProbabilityInfo {
|
||||||
return mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP;
|
return mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
if (hasHistoricalInfo()) {
|
||||||
|
return Arrays.hashCode(new Object[] { mProbability, mTimestamp, mLevel, mCount });
|
||||||
|
} else {
|
||||||
|
return Arrays.hashCode(new Object[] { mProbability });
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String toString() {
|
public String toString() {
|
||||||
return mTimestamp + ":" + mLevel + ":" + mCount;
|
return "f=" + mProbability + (hasHistoricalInfo() ?
|
||||||
|
",historicalInfo=" + mTimestamp + ":" + mLevel + ":" + mCount : "");
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
|
@ -71,27 +71,29 @@ public class Ver4DictEncoder implements DictEncoder {
|
||||||
// Somehow createEmptyDictFile returned true, but the file was not created correctly
|
// Somehow createEmptyDictFile returned true, but the file was not created correctly
|
||||||
throw new IOException("Cannot create dictionary file");
|
throw new IOException("Cannot create dictionary file");
|
||||||
}
|
}
|
||||||
for (final Word word : dict) {
|
for (final WordProperty wordProperty : dict) {
|
||||||
// TODO: switch to addMultipleDictionaryEntries when they support shortcuts
|
// TODO: switch to addMultipleDictionaryEntries when they support shortcuts
|
||||||
if (null == word.mShortcutTargets || word.mShortcutTargets.isEmpty()) {
|
if (null == wordProperty.mShortcutTargets || wordProperty.mShortcutTargets.isEmpty()) {
|
||||||
binaryDict.addUnigramWord(word.mWord, word.mFrequency,
|
binaryDict.addUnigramWord(wordProperty.mWord, wordProperty.getProbability(),
|
||||||
null /* shortcutTarget */, 0 /* shortcutProbability */,
|
null /* shortcutTarget */, 0 /* shortcutProbability */,
|
||||||
word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
|
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
|
||||||
|
0 /* timestamp */);
|
||||||
} else {
|
} else {
|
||||||
for (final WeightedString shortcutTarget : word.mShortcutTargets) {
|
for (final WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
binaryDict.addUnigramWord(word.mWord, word.mFrequency,
|
binaryDict.addUnigramWord(wordProperty.mWord, wordProperty.getProbability(),
|
||||||
shortcutTarget.mWord, shortcutTarget.getProbability(),
|
shortcutTarget.mWord, shortcutTarget.getProbability(),
|
||||||
word.mIsNotAWord, word.mIsBlacklistEntry, 0 /* timestamp */);
|
wordProperty.mIsNotAWord, wordProperty.mIsBlacklistEntry,
|
||||||
|
0 /* timestamp */);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
binaryDict.flushWithGC();
|
binaryDict.flushWithGC();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (final Word word0 : dict) {
|
for (final WordProperty word0Property : dict) {
|
||||||
if (null == word0.mBigrams) continue;
|
if (null == word0Property.mBigrams) continue;
|
||||||
for (final WeightedString word1 : word0.mBigrams) {
|
for (final WeightedString word1 : word0Property.mBigrams) {
|
||||||
binaryDict.addBigramWords(word0.mWord, word1.mWord, word1.getProbability(),
|
binaryDict.addBigramWords(word0Property.mWord, word1.mWord, word1.getProbability(),
|
||||||
0 /* timestamp */);
|
0 /* timestamp */);
|
||||||
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
if (binaryDict.needsToRunGC(true /* mindsBlockByGC */)) {
|
||||||
binaryDict.flushWithGC();
|
binaryDict.flushWithGC();
|
||||||
|
|
|
@ -1,100 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2011 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Utility class for a word with a frequency.
|
|
||||||
*
|
|
||||||
* This is chiefly used to iterate a dictionary.
|
|
||||||
*/
|
|
||||||
public final class Word implements Comparable<Word> {
|
|
||||||
public final String mWord;
|
|
||||||
public final int mFrequency;
|
|
||||||
public final ArrayList<WeightedString> mShortcutTargets;
|
|
||||||
public final ArrayList<WeightedString> mBigrams;
|
|
||||||
public final boolean mIsNotAWord;
|
|
||||||
public final boolean mIsBlacklistEntry;
|
|
||||||
|
|
||||||
private int mHashCode = 0;
|
|
||||||
|
|
||||||
public Word(final String word, final int frequency,
|
|
||||||
final ArrayList<WeightedString> shortcutTargets,
|
|
||||||
final ArrayList<WeightedString> bigrams,
|
|
||||||
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
|
||||||
mWord = word;
|
|
||||||
mFrequency = frequency;
|
|
||||||
mShortcutTargets = shortcutTargets;
|
|
||||||
mBigrams = bigrams;
|
|
||||||
mIsNotAWord = isNotAWord;
|
|
||||||
mIsBlacklistEntry = isBlacklistEntry;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static int computeHashCode(Word word) {
|
|
||||||
return Arrays.hashCode(new Object[] {
|
|
||||||
word.mWord,
|
|
||||||
word.mFrequency,
|
|
||||||
word.mShortcutTargets.hashCode(),
|
|
||||||
word.mBigrams.hashCode(),
|
|
||||||
word.mIsNotAWord,
|
|
||||||
word.mIsBlacklistEntry
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Three-way comparison.
|
|
||||||
*
|
|
||||||
* A Word x is greater than a word y if x has a higher frequency. If they have the same
|
|
||||||
* frequency, they are sorted in lexicographic order.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public int compareTo(Word w) {
|
|
||||||
if (mFrequency < w.mFrequency) return 1;
|
|
||||||
if (mFrequency > w.mFrequency) return -1;
|
|
||||||
return mWord.compareTo(w.mWord);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Equality test.
|
|
||||||
*
|
|
||||||
* Words are equal if they have the same frequency, the same spellings, and the same
|
|
||||||
* attributes.
|
|
||||||
*/
|
|
||||||
@Override
|
|
||||||
public boolean equals(Object o) {
|
|
||||||
if (o == this) return true;
|
|
||||||
if (!(o instanceof Word)) return false;
|
|
||||||
Word w = (Word)o;
|
|
||||||
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
|
|
||||||
&& mShortcutTargets.equals(w.mShortcutTargets)
|
|
||||||
&& mBigrams.equals(w.mBigrams)
|
|
||||||
&& mIsNotAWord == w.mIsNotAWord
|
|
||||||
&& mIsBlacklistEntry == w.mIsBlacklistEntry;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int hashCode() {
|
|
||||||
if (mHashCode == 0) {
|
|
||||||
mHashCode = computeHashCode(this);
|
|
||||||
}
|
|
||||||
return mHashCode;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -0,0 +1,189 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2011 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
import com.android.inputmethod.latin.utils.StringUtils;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility class for a word with a probability.
|
||||||
|
*
|
||||||
|
* This is chiefly used to iterate a dictionary.
|
||||||
|
*/
|
||||||
|
public final class WordProperty implements Comparable<WordProperty> {
|
||||||
|
public final String mWord;
|
||||||
|
public final ProbabilityInfo mProbabilityInfo;
|
||||||
|
public final ArrayList<WeightedString> mShortcutTargets;
|
||||||
|
public final ArrayList<WeightedString> mBigrams;
|
||||||
|
public final boolean mIsNotAWord;
|
||||||
|
public final boolean mIsBlacklistEntry;
|
||||||
|
public final boolean mHasShortcuts;
|
||||||
|
public final boolean mHasBigrams;
|
||||||
|
|
||||||
|
private int mHashCode = 0;
|
||||||
|
|
||||||
|
public WordProperty(final String word, final int probability,
|
||||||
|
final ArrayList<WeightedString> shortcutTargets,
|
||||||
|
final ArrayList<WeightedString> bigrams,
|
||||||
|
final boolean isNotAWord, final boolean isBlacklistEntry) {
|
||||||
|
mWord = word;
|
||||||
|
mProbabilityInfo = new ProbabilityInfo(probability);
|
||||||
|
mShortcutTargets = shortcutTargets;
|
||||||
|
mBigrams = bigrams;
|
||||||
|
mIsNotAWord = isNotAWord;
|
||||||
|
mIsBlacklistEntry = isBlacklistEntry;
|
||||||
|
mHasBigrams = !bigrams.isEmpty();
|
||||||
|
mHasShortcuts = !shortcutTargets.isEmpty();
|
||||||
|
}
|
||||||
|
|
||||||
|
private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
|
||||||
|
return new ProbabilityInfo(
|
||||||
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
|
||||||
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
|
||||||
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
|
||||||
|
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Construct word property using information from native code.
|
||||||
|
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
|
||||||
|
public WordProperty(final int[] codePoints, final boolean isNotAWord,
|
||||||
|
final boolean isBlacklisted, final boolean hasBigram,
|
||||||
|
final boolean hasShortcuts, final int[] probabilityInfo,
|
||||||
|
final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
|
||||||
|
final ArrayList<int[]> shortcutTargets,
|
||||||
|
final ArrayList<Integer> shortcutProbabilities) {
|
||||||
|
mWord = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
|
||||||
|
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
|
||||||
|
mShortcutTargets = CollectionUtils.newArrayList();
|
||||||
|
mBigrams = CollectionUtils.newArrayList();
|
||||||
|
mIsNotAWord = isNotAWord;
|
||||||
|
mIsBlacklistEntry = isBlacklisted;
|
||||||
|
mHasShortcuts = hasShortcuts;
|
||||||
|
mHasBigrams = hasBigram;
|
||||||
|
|
||||||
|
final int bigramTargetCount = bigramTargets.size();
|
||||||
|
for (int i = 0; i < bigramTargetCount; i++) {
|
||||||
|
final String bigramTargetString =
|
||||||
|
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
|
||||||
|
mBigrams.add(new WeightedString(bigramTargetString,
|
||||||
|
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i))));
|
||||||
|
}
|
||||||
|
|
||||||
|
final int shortcutTargetCount = shortcutTargets.size();
|
||||||
|
for (int i = 0; i < shortcutTargetCount; i++) {
|
||||||
|
final String shortcutTargetString =
|
||||||
|
StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
|
||||||
|
mShortcutTargets.add(
|
||||||
|
new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public int getProbability() {
|
||||||
|
return mProbabilityInfo.mProbability;
|
||||||
|
}
|
||||||
|
|
||||||
|
private static int computeHashCode(WordProperty word) {
|
||||||
|
return Arrays.hashCode(new Object[] {
|
||||||
|
word.mWord,
|
||||||
|
word.mProbabilityInfo,
|
||||||
|
word.mShortcutTargets.hashCode(),
|
||||||
|
word.mBigrams.hashCode(),
|
||||||
|
word.mIsNotAWord,
|
||||||
|
word.mIsBlacklistEntry
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Three-way comparison.
|
||||||
|
*
|
||||||
|
* A Word x is greater than a word y if x has a higher frequency. If they have the same
|
||||||
|
* frequency, they are sorted in lexicographic order.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public int compareTo(final WordProperty w) {
|
||||||
|
if (getProbability() < w.getProbability()) return 1;
|
||||||
|
if (getProbability() > w.getProbability()) return -1;
|
||||||
|
return mWord.compareTo(w.mWord);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Equality test.
|
||||||
|
*
|
||||||
|
* Words are equal if they have the same frequency, the same spellings, and the same
|
||||||
|
* attributes.
|
||||||
|
*/
|
||||||
|
@Override
|
||||||
|
public boolean equals(Object o) {
|
||||||
|
if (o == this) return true;
|
||||||
|
if (!(o instanceof WordProperty)) return false;
|
||||||
|
WordProperty w = (WordProperty)o;
|
||||||
|
return mProbabilityInfo.equals(w.mProbabilityInfo) && mWord.equals(w.mWord)
|
||||||
|
&& mShortcutTargets.equals(w.mShortcutTargets) && mBigrams.equals(w.mBigrams)
|
||||||
|
&& mIsNotAWord == w.mIsNotAWord && mIsBlacklistEntry == w.mIsBlacklistEntry
|
||||||
|
&& mHasBigrams == w.mHasBigrams && mHasShortcuts && w.mHasBigrams;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int hashCode() {
|
||||||
|
if (mHashCode == 0) {
|
||||||
|
mHashCode = computeHashCode(this);
|
||||||
|
}
|
||||||
|
return mHashCode;
|
||||||
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public boolean isValid() {
|
||||||
|
return getProbability() != BinaryDictionary.NOT_A_PROBABILITY;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String toString() {
|
||||||
|
// TODO: Move this logic to CombinedInputOutput.
|
||||||
|
final StringBuffer builder = new StringBuffer();
|
||||||
|
builder.append(" word=" + mWord);
|
||||||
|
builder.append(",");
|
||||||
|
builder.append(mProbabilityInfo.toString());
|
||||||
|
if (mIsNotAWord) {
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("not_a_word=true");
|
||||||
|
}
|
||||||
|
if (mIsBlacklistEntry) {
|
||||||
|
builder.append(",");
|
||||||
|
builder.append("blacklisted=true");
|
||||||
|
}
|
||||||
|
builder.append("\n");
|
||||||
|
for (int i = 0; i < mBigrams.size(); i++) {
|
||||||
|
builder.append(" bigram=" + mBigrams.get(i).mWord);
|
||||||
|
builder.append(",");
|
||||||
|
builder.append(mBigrams.get(i).mProbabilityInfo.toString());
|
||||||
|
builder.append("\n");
|
||||||
|
}
|
||||||
|
for (int i = 0; i < mShortcutTargets.size(); i++) {
|
||||||
|
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
|
||||||
|
builder.append(",");
|
||||||
|
builder.append(mShortcutTargets.get(i).mProbabilityInfo.toString());
|
||||||
|
builder.append("\n");
|
||||||
|
}
|
||||||
|
return builder.toString();
|
||||||
|
}
|
||||||
|
}
|
|
@ -1,127 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.utils;
|
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
|
||||||
import com.android.inputmethod.latin.BinaryDictionary;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
|
||||||
import com.android.inputmethod.latin.makedict.ProbabilityInfo;
|
|
||||||
|
|
||||||
import java.util.ArrayList;
|
|
||||||
|
|
||||||
// This has information that belong to a unigram. This class has some detailed attributes such as
|
|
||||||
// historical information but they have to be checked only for testing purpose.
|
|
||||||
@UsedForTesting
|
|
||||||
public class WordProperty {
|
|
||||||
public final String mCodePoints;
|
|
||||||
public final boolean mIsNotAWord;
|
|
||||||
public final boolean mIsBlacklisted;
|
|
||||||
public final boolean mHasBigrams;
|
|
||||||
public final boolean mHasShortcuts;
|
|
||||||
public final ProbabilityInfo mProbabilityInfo;
|
|
||||||
public final ArrayList<WeightedString> mBigramTargets = CollectionUtils.newArrayList();
|
|
||||||
public final ArrayList<ProbabilityInfo> mBigramProbabilityInfo = CollectionUtils.newArrayList();
|
|
||||||
public final ArrayList<WeightedString> mShortcutTargets = CollectionUtils.newArrayList();
|
|
||||||
|
|
||||||
private static ProbabilityInfo createProbabilityInfoFromArray(final int[] probabilityInfo) {
|
|
||||||
return new ProbabilityInfo(
|
|
||||||
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_PROBABILITY_INDEX],
|
|
||||||
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_TIMESTAMP_INDEX],
|
|
||||||
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_LEVEL_INDEX],
|
|
||||||
probabilityInfo[BinaryDictionary.FORMAT_WORD_PROPERTY_COUNT_INDEX]);
|
|
||||||
}
|
|
||||||
|
|
||||||
// This represents invalid word when the probability is BinaryDictionary.NOT_A_PROBABILITY.
|
|
||||||
public WordProperty(final int[] codePoints, final boolean isNotAWord,
|
|
||||||
final boolean isBlacklisted, final boolean hasBigram,
|
|
||||||
final boolean hasShortcuts, final int[] probabilityInfo,
|
|
||||||
final ArrayList<int[]> bigramTargets, final ArrayList<int[]> bigramProbabilityInfo,
|
|
||||||
final ArrayList<int[]> shortcutTargets,
|
|
||||||
final ArrayList<Integer> shortcutProbabilities) {
|
|
||||||
mCodePoints = StringUtils.getStringFromNullTerminatedCodePointArray(codePoints);
|
|
||||||
mIsNotAWord = isNotAWord;
|
|
||||||
mIsBlacklisted = isBlacklisted;
|
|
||||||
mHasBigrams = hasBigram;
|
|
||||||
mHasShortcuts = hasShortcuts;
|
|
||||||
mProbabilityInfo = createProbabilityInfoFromArray(probabilityInfo);
|
|
||||||
|
|
||||||
final int bigramTargetCount = bigramTargets.size();
|
|
||||||
for (int i = 0; i < bigramTargetCount; i++) {
|
|
||||||
final String bigramTargetString =
|
|
||||||
StringUtils.getStringFromNullTerminatedCodePointArray(bigramTargets.get(i));
|
|
||||||
final ProbabilityInfo bigramProbability =
|
|
||||||
createProbabilityInfoFromArray(bigramProbabilityInfo.get(i));
|
|
||||||
mBigramTargets.add(
|
|
||||||
new WeightedString(bigramTargetString, bigramProbability.mProbability));
|
|
||||||
mBigramProbabilityInfo.add(bigramProbability);
|
|
||||||
}
|
|
||||||
|
|
||||||
final int shortcutTargetCount = shortcutTargets.size();
|
|
||||||
for (int i = 0; i < shortcutTargetCount; i++) {
|
|
||||||
final String shortcutTargetString =
|
|
||||||
StringUtils.getStringFromNullTerminatedCodePointArray(shortcutTargets.get(i));
|
|
||||||
mShortcutTargets.add(
|
|
||||||
new WeightedString(shortcutTargetString, shortcutProbabilities.get(i)));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public boolean isValid() {
|
|
||||||
return mProbabilityInfo.mProbability != BinaryDictionary.NOT_A_PROBABILITY;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public String toString() {
|
|
||||||
// TODO: Move this logic to CombinedInputOutput.
|
|
||||||
final StringBuffer builder = new StringBuffer();
|
|
||||||
builder.append(" word=" + mCodePoints);
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("f=" + mProbabilityInfo.mProbability);
|
|
||||||
if (mIsNotAWord) {
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("not_a_word=true");
|
|
||||||
}
|
|
||||||
if (mIsBlacklisted) {
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("blacklisted=true");
|
|
||||||
}
|
|
||||||
if (mProbabilityInfo.mTimestamp != BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("historicalInfo=" + mProbabilityInfo);
|
|
||||||
}
|
|
||||||
builder.append("\n");
|
|
||||||
for (int i = 0; i < mBigramTargets.size(); i++) {
|
|
||||||
builder.append(" bigram=" + mBigramTargets.get(i).mWord);
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("f=" + mBigramTargets.get(i).getProbability());
|
|
||||||
if (mBigramProbabilityInfo.get(i).mTimestamp
|
|
||||||
!= BinaryDictionary.NOT_A_VALID_TIMESTAMP) {
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("historicalInfo=" + mBigramProbabilityInfo.get(i));
|
|
||||||
}
|
|
||||||
builder.append("\n");
|
|
||||||
}
|
|
||||||
for (int i = 0; i < mShortcutTargets.size(); i++) {
|
|
||||||
builder.append(" shortcut=" + mShortcutTargets.get(i).mWord);
|
|
||||||
builder.append(",");
|
|
||||||
builder.append("f=" + mShortcutTargets.get(i).getProbability());
|
|
||||||
builder.append("\n");
|
|
||||||
}
|
|
||||||
return builder.toString();
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -24,9 +24,9 @@ import android.util.Pair;
|
||||||
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
import com.android.inputmethod.latin.makedict.CodePointUtils;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
import com.android.inputmethod.latin.utils.FileUtils;
|
import com.android.inputmethod.latin.utils.FileUtils;
|
||||||
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
import com.android.inputmethod.latin.utils.LanguageModelParam;
|
||||||
import com.android.inputmethod.latin.utils.WordProperty;
|
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
@ -916,15 +916,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
words.add(word);
|
words.add(word);
|
||||||
wordProbabilities.put(word, unigramProbability);
|
wordProbabilities.put(word, unigramProbability);
|
||||||
final WordProperty unigramProperty = binaryDictionary.getWordProperty(word);
|
final WordProperty wordProperty = binaryDictionary.getWordProperty(word);
|
||||||
assertEquals(word, unigramProperty.mCodePoints);
|
assertEquals(word, wordProperty.mWord);
|
||||||
assertTrue(unigramProperty.isValid());
|
assertTrue(wordProperty.isValid());
|
||||||
assertEquals(isNotAWord, unigramProperty.mIsNotAWord);
|
assertEquals(isNotAWord, wordProperty.mIsNotAWord);
|
||||||
assertEquals(isBlacklisted, unigramProperty.mIsBlacklisted);
|
assertEquals(isBlacklisted, wordProperty.mIsBlacklistEntry);
|
||||||
assertEquals(false, unigramProperty.mHasBigrams);
|
assertEquals(false, wordProperty.mHasBigrams);
|
||||||
assertEquals(false, unigramProperty.mHasShortcuts);
|
assertEquals(false, wordProperty.mHasShortcuts);
|
||||||
assertEquals(unigramProbability, unigramProperty.mProbabilityInfo.mProbability);
|
assertEquals(unigramProbability, wordProperty.mProbabilityInfo.mProbability);
|
||||||
assertTrue(unigramProperty.mShortcutTargets.isEmpty());
|
assertTrue(wordProperty.mShortcutTargets.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
for (int i = 0; i < BIGRAM_COUNT; i++) {
|
for (int i = 0; i < BIGRAM_COUNT; i++) {
|
||||||
|
@ -955,18 +955,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
||||||
final WordProperty unigramProperty = binaryDictionary.getWordProperty(word0);
|
final WordProperty wordProperty = binaryDictionary.getWordProperty(word0);
|
||||||
assertEquals(bigramWord1s.size(), unigramProperty.mBigramTargets.size());
|
assertEquals(bigramWord1s.size(), wordProperty.mBigrams.size());
|
||||||
assertEquals(unigramProperty.mBigramTargets.size(),
|
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||||
unigramProperty.mBigramProbabilityInfo.size());
|
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||||
for (int j = 0; j < unigramProperty.mBigramTargets.size(); j++) {
|
|
||||||
final String word1 = unigramProperty.mBigramTargets.get(j).mWord;
|
|
||||||
assertTrue(bigramWord1s.contains(word1));
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
final int probability = unigramProperty.mBigramTargets.get(j).getProbability();
|
final int probability = wordProperty.mBigrams.get(j).getProbability();
|
||||||
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
|
assertEquals((int)bigramProbabilities.get(new Pair<String, String>(word0, word1)),
|
||||||
probability);
|
probability);
|
||||||
assertEquals(unigramProperty.mBigramProbabilityInfo.get(j).mProbability,
|
assertEquals(wordProperty.mBigrams.get(j).getProbability(), probability);
|
||||||
probability);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1045,15 +1042,15 @@ public class BinaryDictionaryTests extends AndroidTestCase {
|
||||||
final BinaryDictionary.GetNextWordPropertyResult result =
|
final BinaryDictionary.GetNextWordPropertyResult result =
|
||||||
binaryDictionary.getNextWordProperty(token);
|
binaryDictionary.getNextWordProperty(token);
|
||||||
final WordProperty wordProperty = result.mWordProperty;
|
final WordProperty wordProperty = result.mWordProperty;
|
||||||
final String word0 = wordProperty.mCodePoints;
|
final String word0 = wordProperty.mWord;
|
||||||
assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
|
assertEquals((int)wordProbabilitiesToCheckLater.get(word0),
|
||||||
wordProperty.mProbabilityInfo.mProbability);
|
wordProperty.mProbabilityInfo.mProbability);
|
||||||
wordSet.remove(word0);
|
wordSet.remove(word0);
|
||||||
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
final HashSet<String> bigramWord1s = bigrams.get(word0);
|
||||||
for (int j = 0; j < wordProperty.mBigramTargets.size(); j++) {
|
for (int j = 0; j < wordProperty.mBigrams.size(); j++) {
|
||||||
final String word1 = wordProperty.mBigramTargets.get(j).mWord;
|
final String word1 = wordProperty.mBigrams.get(j).mWord;
|
||||||
assertTrue(bigramWord1s.contains(word1));
|
assertTrue(bigramWord1s.contains(word1));
|
||||||
final int probability = wordProperty.mBigramTargets.get(j).getProbability();
|
final int probability = wordProperty.mBigrams.get(j).getProbability();
|
||||||
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
final Pair<String, String> bigram = new Pair<String, String>(word0, word1);
|
||||||
assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
|
assertEquals((int)bigramProbabilitiesToCheckLater.get(bigram), probability);
|
||||||
bigramSet.remove(bigram);
|
bigramSet.remove(bigram);
|
||||||
|
|
|
@ -48,8 +48,7 @@ USED_TARGETTED_UTILS := \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/LocaleUtils.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/LocaleUtils.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ResizableIntArray.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ResizableIntArray.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/StringUtils.java
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/WordProperty.java
|
|
||||||
|
|
||||||
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \
|
DICTTOOL_ONDEVICE_TESTS_DIRECTORY := \
|
||||||
$(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmethod/latin/makedict/
|
$(LATINIME_LOCAL_DIR)/tests/src/com/android/inputmethod/latin/makedict/
|
||||||
|
|
|
@ -21,7 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -45,7 +45,7 @@ public class CombinedInputOutput {
|
||||||
private static final String DICTIONARY_TAG = "dictionary";
|
private static final String DICTIONARY_TAG = "dictionary";
|
||||||
private static final String BIGRAM_TAG = "bigram";
|
private static final String BIGRAM_TAG = "bigram";
|
||||||
private static final String SHORTCUT_TAG = "shortcut";
|
private static final String SHORTCUT_TAG = "shortcut";
|
||||||
private static final String FREQUENCY_TAG = "f";
|
private static final String PROBABILITY_TAG = "f";
|
||||||
private static final String WORD_TAG = "word";
|
private static final String WORD_TAG = "word";
|
||||||
private static final String NOT_A_WORD_TAG = "not_a_word";
|
private static final String NOT_A_WORD_TAG = "not_a_word";
|
||||||
private static final String WHITELIST_TAG = "whitelist";
|
private static final String WHITELIST_TAG = "whitelist";
|
||||||
|
@ -138,7 +138,7 @@ public class CombinedInputOutput {
|
||||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||||
if (WORD_TAG.equals(params[0])) {
|
if (WORD_TAG.equals(params[0])) {
|
||||||
word = params[1];
|
word = params[1];
|
||||||
} else if (FREQUENCY_TAG.equals(params[0])) {
|
} else if (PROBABILITY_TAG.equals(params[0])) {
|
||||||
freq = Integer.parseInt(params[1]);
|
freq = Integer.parseInt(params[1]);
|
||||||
} else if (NOT_A_WORD_TAG.equals(params[0])) {
|
} else if (NOT_A_WORD_TAG.equals(params[0])) {
|
||||||
isNotAWord = "true".equals(params[1]);
|
isNotAWord = "true".equals(params[1]);
|
||||||
|
@ -152,7 +152,7 @@ public class CombinedInputOutput {
|
||||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||||
if (SHORTCUT_TAG.equals(params[0])) {
|
if (SHORTCUT_TAG.equals(params[0])) {
|
||||||
shortcut = params[1];
|
shortcut = params[1];
|
||||||
} else if (FREQUENCY_TAG.equals(params[0])) {
|
} else if (PROBABILITY_TAG.equals(params[0])) {
|
||||||
shortcutFreq = WHITELIST_TAG.equals(params[1])
|
shortcutFreq = WHITELIST_TAG.equals(params[1])
|
||||||
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
? FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
||||||
: Integer.parseInt(params[1]);
|
: Integer.parseInt(params[1]);
|
||||||
|
@ -171,7 +171,7 @@ public class CombinedInputOutput {
|
||||||
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
if (2 != params.length) throw new RuntimeException("Wrong format : " + line);
|
||||||
if (BIGRAM_TAG.equals(params[0])) {
|
if (BIGRAM_TAG.equals(params[0])) {
|
||||||
secondWordOfBigram = params[1];
|
secondWordOfBigram = params[1];
|
||||||
} else if (FREQUENCY_TAG.equals(params[0])) {
|
} else if (PROBABILITY_TAG.equals(params[0])) {
|
||||||
bigramFreq = Integer.parseInt(params[1]);
|
bigramFreq = Integer.parseInt(params[1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -200,9 +200,10 @@ public class CombinedInputOutput {
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
|
public static void writeDictionaryCombined(Writer destination, FusionDictionary dict)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final TreeSet<Word> set = new TreeSet<Word>();
|
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
|
||||||
for (Word word : dict) {
|
for (WordProperty wordProperty: dict) {
|
||||||
set.add(word); // This for ordering by frequency, then by asciibetic order
|
// This for ordering by frequency, then by asciibetic order
|
||||||
|
wordPropertiesInDict.add(wordProperty);
|
||||||
}
|
}
|
||||||
final HashMap<String, String> options = dict.mOptions.mAttributes;
|
final HashMap<String, String> options = dict.mOptions.mAttributes;
|
||||||
destination.write(DICTIONARY_TAG + "=");
|
destination.write(DICTIONARY_TAG + "=");
|
||||||
|
@ -215,20 +216,20 @@ public class CombinedInputOutput {
|
||||||
destination.write("," + key + "=" + value);
|
destination.write("," + key + "=" + value);
|
||||||
}
|
}
|
||||||
destination.write("\n");
|
destination.write("\n");
|
||||||
for (Word word : set) {
|
for (WordProperty wordProperty : wordPropertiesInDict) {
|
||||||
destination.write(" " + WORD_TAG + "=" + word.mWord + ","
|
destination.write(" " + WORD_TAG + "=" + wordProperty.mWord + ","
|
||||||
+ FREQUENCY_TAG + "=" + word.mFrequency
|
+ PROBABILITY_TAG + "=" + wordProperty.getProbability()
|
||||||
+ (word.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
|
+ (wordProperty.mIsNotAWord ? "," + NOT_A_WORD_TAG + "=true\n" : "\n"));
|
||||||
if (null != word.mShortcutTargets) {
|
if (null != wordProperty.mShortcutTargets) {
|
||||||
for (WeightedString target : word.mShortcutTargets) {
|
for (WeightedString target : wordProperty.mShortcutTargets) {
|
||||||
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
|
destination.write(" " + SHORTCUT_TAG + "=" + target.mWord + ","
|
||||||
+ FREQUENCY_TAG + "=" + target.getProbability() + "\n");
|
+ PROBABILITY_TAG + "=" + target.getProbability() + "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (null != word.mBigrams) {
|
if (null != wordProperty.mBigrams) {
|
||||||
for (WeightedString bigram : word.mBigrams) {
|
for (WeightedString bigram : wordProperty.mBigrams) {
|
||||||
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
|
destination.write(" " + BIGRAM_TAG + "=" + bigram.mWord + ","
|
||||||
+ FREQUENCY_TAG + "=" + bigram.getProbability() + "\n");
|
+ PROBABILITY_TAG + "=" + bigram.getProbability() + "\n");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -19,7 +19,7 @@ package com.android.inputmethod.latin.dicttool;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -108,42 +108,46 @@ public class Diff extends Dicttool.Command {
|
||||||
|
|
||||||
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
|
private static void diffWords(final FusionDictionary dict0, final FusionDictionary dict1) {
|
||||||
boolean hasDifferences = false;
|
boolean hasDifferences = false;
|
||||||
for (final Word word0 : dict0) {
|
for (final WordProperty word0Property : dict0) {
|
||||||
final PtNode word1 = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
|
final PtNode word1PtNode = FusionDictionary.findWordInTree(dict1.mRootNodeArray,
|
||||||
word0.mWord);
|
word0Property.mWord);
|
||||||
if (null == word1) {
|
if (null == word1PtNode) {
|
||||||
// This word is not in dict1
|
// This word is not in dict1
|
||||||
System.out.println("Deleted: " + word0.mWord + " " + word0.mFrequency);
|
System.out.println("Deleted: " + word0Property.mWord + " "
|
||||||
|
+ word0Property.getProbability());
|
||||||
hasDifferences = true;
|
hasDifferences = true;
|
||||||
} else {
|
} else {
|
||||||
// We found the word. Compare frequencies, shortcuts, bigrams
|
// We found the word. Compare frequencies, shortcuts, bigrams
|
||||||
if (word0.mFrequency != word1.getFrequency()) {
|
if (word0Property.getProbability() != word1PtNode.getFrequency()) {
|
||||||
System.out.println("Freq changed: " + word0.mWord + " " + word0.mFrequency
|
System.out.println("Probability changed: " + word0Property.mWord + " "
|
||||||
+ " -> " + word1.getFrequency());
|
+ word0Property.getProbability() + " -> " + word1PtNode.getFrequency());
|
||||||
hasDifferences = true;
|
hasDifferences = true;
|
||||||
}
|
}
|
||||||
if (word0.mIsNotAWord != word1.getIsNotAWord()) {
|
if (word0Property.mIsNotAWord != word1PtNode.getIsNotAWord()) {
|
||||||
System.out.println("Not a word: " + word0.mWord + " " + word0.mIsNotAWord
|
System.out.println("Not a word: " + word0Property.mWord + " "
|
||||||
+ " -> " + word1.getIsNotAWord());
|
+ word0Property.mIsNotAWord + " -> " + word1PtNode.getIsNotAWord());
|
||||||
hasDifferences = true;
|
hasDifferences = true;
|
||||||
}
|
}
|
||||||
if (word0.mIsBlacklistEntry != word1.getIsBlacklistEntry()) {
|
if (word0Property.mIsBlacklistEntry != word1PtNode.getIsBlacklistEntry()) {
|
||||||
System.out.println("Blacklist: " + word0.mWord + " " + word0.mIsBlacklistEntry
|
System.out.println("Blacklist: " + word0Property.mWord + " "
|
||||||
+ " -> " + word1.getIsBlacklistEntry());
|
+ word0Property.mIsBlacklistEntry + " -> "
|
||||||
|
+ word1PtNode.getIsBlacklistEntry());
|
||||||
hasDifferences = true;
|
hasDifferences = true;
|
||||||
}
|
}
|
||||||
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
|
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
|
||||||
"Bigram", word0.mBigrams, word1.getBigrams());
|
"Bigram", word0Property.mBigrams, word1PtNode.getBigrams());
|
||||||
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0.mWord,
|
hasDifferences |= hasAttributesDifferencesAndPrintThemIfAny(word0Property.mWord,
|
||||||
"Shortcut", word0.mShortcutTargets, word1.getShortcutTargets());
|
"Shortcut", word0Property.mShortcutTargets,
|
||||||
|
word1PtNode.getShortcutTargets());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for (final Word word1 : dict1) {
|
for (final WordProperty word1Property : dict1) {
|
||||||
final PtNode word0 = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
|
final PtNode word0PtNode = FusionDictionary.findWordInTree(dict0.mRootNodeArray,
|
||||||
word1.mWord);
|
word1Property.mWord);
|
||||||
if (null == word0) {
|
if (null == word0PtNode) {
|
||||||
// This word is not in dict0
|
// This word is not in dict0
|
||||||
System.out.println("Added: " + word1.mWord + " " + word1.mFrequency);
|
System.out.println("Added: " + word1Property.mWord + " "
|
||||||
|
+ word1Property.getProbability());
|
||||||
hasDifferences = true;
|
hasDifferences = true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
|
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -43,14 +43,14 @@ public class Info extends Dicttool.Command {
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
int shortcutCount = 0;
|
int shortcutCount = 0;
|
||||||
int whitelistCount = 0;
|
int whitelistCount = 0;
|
||||||
for (final Word w : dict) {
|
for (final WordProperty wordProperty : dict) {
|
||||||
++wordCount;
|
++wordCount;
|
||||||
if (null != w.mBigrams) {
|
if (null != wordProperty.mBigrams) {
|
||||||
bigramCount += w.mBigrams.size();
|
bigramCount += wordProperty.mBigrams.size();
|
||||||
}
|
}
|
||||||
if (null != w.mShortcutTargets) {
|
if (null != wordProperty.mShortcutTargets) {
|
||||||
shortcutCount += w.mShortcutTargets.size();
|
shortcutCount += wordProperty.mShortcutTargets.size();
|
||||||
for (WeightedString shortcutTarget : w.mShortcutTargets) {
|
for (WeightedString shortcutTarget : wordProperty.mShortcutTargets) {
|
||||||
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
if (FormatSpec.SHORTCUT_WHITELIST_FREQUENCY
|
||||||
== shortcutTarget.getProbability()) {
|
== shortcutTarget.getProbability()) {
|
||||||
++whitelistCount;
|
++whitelistCount;
|
||||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
|
@ -52,7 +52,7 @@ public class XmlDictInputOutput {
|
||||||
private static final String WORD_TAG = "w";
|
private static final String WORD_TAG = "w";
|
||||||
private static final String BIGRAM_TAG = "bigram";
|
private static final String BIGRAM_TAG = "bigram";
|
||||||
private static final String SHORTCUT_TAG = "shortcut";
|
private static final String SHORTCUT_TAG = "shortcut";
|
||||||
private static final String FREQUENCY_ATTR = "f";
|
private static final String PROBABILITY_ATTR = "f";
|
||||||
private static final String WORD_ATTR = "word";
|
private static final String WORD_ATTR = "word";
|
||||||
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
||||||
|
|
||||||
|
@ -107,7 +107,7 @@ public class XmlDictInputOutput {
|
||||||
mWord = "";
|
mWord = "";
|
||||||
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
|
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
|
||||||
final String attrName = attrs.getLocalName(attrIndex);
|
final String attrName = attrs.getLocalName(attrIndex);
|
||||||
if (FREQUENCY_ATTR.equals(attrName)) {
|
if (PROBABILITY_ATTR.equals(attrName)) {
|
||||||
mFreq = Integer.parseInt(attrs.getValue(attrIndex));
|
mFreq = Integer.parseInt(attrs.getValue(attrIndex));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -348,9 +348,9 @@ public class XmlDictInputOutput {
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
|
public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final TreeSet<Word> set = new TreeSet<Word>();
|
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<WordProperty>();
|
||||||
for (Word word : dict) {
|
for (WordProperty wordProperty : dict) {
|
||||||
set.add(word);
|
wordPropertiesInDict.add(wordProperty);
|
||||||
}
|
}
|
||||||
// TODO: use an XMLSerializer if this gets big
|
// TODO: use an XMLSerializer if this gets big
|
||||||
destination.write("<wordlist format=\"2\"");
|
destination.write("<wordlist format=\"2\"");
|
||||||
|
@ -361,23 +361,24 @@ public class XmlDictInputOutput {
|
||||||
}
|
}
|
||||||
destination.write(">\n");
|
destination.write(">\n");
|
||||||
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
|
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
|
||||||
for (Word word : set) {
|
for (WordProperty wordProperty : wordPropertiesInDict) {
|
||||||
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
|
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + wordProperty.mWord
|
||||||
+ FREQUENCY_ATTR + "=\"" + word.mFrequency
|
+ "\" " + PROBABILITY_ATTR + "=\"" + wordProperty.getProbability()
|
||||||
+ (word.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "") + "\">");
|
+ (wordProperty.mIsNotAWord ? "\" " + NOT_A_WORD_ATTR + "=\"true" : "")
|
||||||
if (null != word.mShortcutTargets) {
|
+ "\">");
|
||||||
|
if (null != wordProperty.mShortcutTargets) {
|
||||||
destination.write("\n");
|
destination.write("\n");
|
||||||
for (WeightedString target : word.mShortcutTargets) {
|
for (WeightedString target : wordProperty.mShortcutTargets) {
|
||||||
destination.write(" <" + SHORTCUT_TAG + " " + FREQUENCY_ATTR + "=\""
|
destination.write(" <" + SHORTCUT_TAG + " " + PROBABILITY_ATTR + "=\""
|
||||||
+ target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
|
+ target.getProbability() + "\">" + target.mWord + "</" + SHORTCUT_TAG
|
||||||
+ ">\n");
|
+ ">\n");
|
||||||
}
|
}
|
||||||
destination.write(" ");
|
destination.write(" ");
|
||||||
}
|
}
|
||||||
if (null != word.mBigrams) {
|
if (null != wordProperty.mBigrams) {
|
||||||
destination.write("\n");
|
destination.write("\n");
|
||||||
for (WeightedString bigram : word.mBigrams) {
|
for (WeightedString bigram : wordProperty.mBigrams) {
|
||||||
destination.write(" <" + BIGRAM_TAG + " " + FREQUENCY_ATTR + "=\""
|
destination.write(" <" + BIGRAM_TAG + " " + PROBABILITY_ATTR + "=\""
|
||||||
+ bigram.getProbability() + "\">" + bigram.mWord
|
+ bigram.getProbability() + "\">" + bigram.mWord
|
||||||
+ "</" + BIGRAM_TAG + ">\n");
|
+ "</" + BIGRAM_TAG + ">\n");
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.Word;
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
|
|
||||||
import junit.framework.TestCase;
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
@ -87,8 +87,8 @@ public class FusionDictionaryTest extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void dumpDict(final FusionDictionary dict) {
|
private void dumpDict(final FusionDictionary dict) {
|
||||||
for (Word w : dict) {
|
for (WordProperty wordProperty : dict) {
|
||||||
System.out.println("Word " + dumpWord(w.mWord));
|
System.out.println("Word " + dumpWord(wordProperty.mWord));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue