Remove the shortcutOnly attribute which is now useless.

Change-Id: Ifccdfdaf7c0066bb7728981503baceff0fedb71f
main
Jean Chalard 2012-03-27 21:36:52 +09:00
parent fb64d0cd03
commit 8cf1a8d04f
4 changed files with 21 additions and 73 deletions

View File

@ -1172,11 +1172,10 @@ public class BinaryDictInputOutput {
} }
nodeContents.add( nodeContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
children, false)); children));
} else { } else {
nodeContents.add( nodeContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency, new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency));
false));
} }
groupOffset = info.mEndAddress; groupOffset = info.mEndAddress;
} }

View File

@ -98,35 +98,24 @@ public class FusionDictionary implements Iterable<Word> {
ArrayList<WeightedString> mShortcutTargets; ArrayList<WeightedString> mShortcutTargets;
ArrayList<WeightedString> mBigrams; ArrayList<WeightedString> mBigrams;
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
boolean mIsShortcutOnly; // Only valid if this is a terminal.
Node mChildren; Node mChildren;
// The two following members to help with binary generation // The two following members to help with binary generation
int mCachedSize; int mCachedSize;
int mCachedAddress; int mCachedAddress;
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final ArrayList<WeightedString> bigrams, final int frequency) {
final boolean isShortcutOnly) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mIsShortcutOnly = isShortcutOnly;
if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
throw new RuntimeException("A node must be a terminal to be a shortcut only");
}
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mChildren = null; mChildren = null;
} }
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets, public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final int frequency, final Node children, final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
final boolean isShortcutOnly) {
mChars = chars; mChars = chars;
mFrequency = frequency; mFrequency = frequency;
mIsShortcutOnly = isShortcutOnly;
if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
throw new RuntimeException("A node must be a terminal to be a shortcut only");
}
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mChildren = children; mChildren = children;
@ -205,7 +194,7 @@ public class FusionDictionary implements Iterable<Word> {
* updated if they are higher than the existing ones. * updated if they are higher than the existing ones.
*/ */
public void update(int frequency, ArrayList<WeightedString> shortcutTargets, public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
ArrayList<WeightedString> bigrams, boolean isShortcutOnly) { ArrayList<WeightedString> bigrams) {
if (frequency > mFrequency) { if (frequency > mFrequency) {
mFrequency = frequency; mFrequency = frequency;
} }
@ -241,7 +230,6 @@ public class FusionDictionary implements Iterable<Word> {
} }
} }
} }
mIsShortcutOnly = isShortcutOnly;
} }
} }
@ -304,7 +292,7 @@ public class FusionDictionary implements Iterable<Word> {
for (WeightedString word : words) { for (WeightedString word : words) {
final CharGroup t = findWordInTree(mRoot, word.mWord); final CharGroup t = findWordInTree(mRoot, word.mWord);
if (null == t) { if (null == t) {
add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */); add(getCodePoints(word.mWord), 0, null, null);
} }
} }
} }
@ -328,7 +316,7 @@ public class FusionDictionary implements Iterable<Word> {
if (null != bigrams) { if (null != bigrams) {
addNeutralWords(bigrams); addNeutralWords(bigrams);
} }
add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */); add(getCodePoints(word), frequency, shortcutTargets, bigrams);
} }
/** /**
@ -349,21 +337,6 @@ public class FusionDictionary implements Iterable<Word> {
} }
} }
/**
* Helper method to add a shortcut that should not be a dictionary word.
*
* @param word the word to add.
* @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets a list of shortcut targets. May not be null.
*/
public void addShortcutOnly(final String word, final int frequency,
final ArrayList<WeightedString> shortcutTargets) {
if (null == shortcutTargets) {
throw new RuntimeException("Can't add a shortcut without targets");
}
add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */);
}
/** /**
* Helper method to add a new bigram to the dictionary. * Helper method to add a new bigram to the dictionary.
* *
@ -377,7 +350,7 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup charGroup2 = findWordInTree(mRoot, word2); final CharGroup charGroup2 = findWordInTree(mRoot, word2);
if (charGroup2 == null) { if (charGroup2 == null) {
// TODO: refactor with the identical code in addNeutralWords // TODO: refactor with the identical code in addNeutralWords
add(getCodePoints(word2), 0, null, null, false /* isShortcutOnly */); add(getCodePoints(word2), 0, null, null);
} }
charGroup.addBigram(word2, frequency); charGroup.addBigram(word2, frequency);
} else { } else {
@ -395,12 +368,10 @@ public class FusionDictionary implements Iterable<Word> {
* @param frequency the frequency of the word, in the range [0..255]. * @param frequency the frequency of the word, in the range [0..255].
* @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
* @param bigrams an optional list of bigrams for this word (null if none). * @param bigrams an optional list of bigrams for this word (null if none).
* @param isShortcutOnly whether this should be a shortcut only.
*/ */
private void add(final int[] word, final int frequency, private void add(final int[] word, final int frequency,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final ArrayList<WeightedString> bigrams) {
final boolean isShortcutOnly) {
assert(frequency >= 0 && frequency <= 255); assert(frequency >= 0 && frequency <= 255);
Node currentNode = mRoot; Node currentNode = mRoot;
int charIndex = 0; int charIndex = 0;
@ -425,7 +396,7 @@ public class FusionDictionary implements Iterable<Word> {
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
final CharGroup newGroup = new CharGroup( final CharGroup newGroup = new CharGroup(
Arrays.copyOfRange(word, charIndex, word.length), Arrays.copyOfRange(word, charIndex, word.length),
shortcutTargets, bigrams, frequency, isShortcutOnly); shortcutTargets, bigrams, frequency);
currentNode.mData.add(insertionIndex, newGroup); currentNode.mData.add(insertionIndex, newGroup);
checkStack(currentNode); checkStack(currentNode);
} else { } else {
@ -435,13 +406,13 @@ public class FusionDictionary implements Iterable<Word> {
// The new word is a prefix of an existing word, but the node on which it // The new word is a prefix of an existing word, but the node on which it
// should end already exists as is. Since the old CharNode was not a terminal, // should end already exists as is. Since the old CharNode was not a terminal,
// make it one by filling in its frequency and other attributes // make it one by filling in its frequency and other attributes
currentGroup.update(frequency, shortcutTargets, bigrams, isShortcutOnly); currentGroup.update(frequency, shortcutTargets, bigrams);
} else { } else {
// The new word matches the full old word and extends past it. // The new word matches the full old word and extends past it.
// We only have to create a new node and add it to the end of this. // We only have to create a new node and add it to the end of this.
final CharGroup newNode = new CharGroup( final CharGroup newNode = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
shortcutTargets, bigrams, frequency, isShortcutOnly); shortcutTargets, bigrams, frequency);
currentGroup.mChildren = new Node(); currentGroup.mChildren = new Node();
currentGroup.mChildren.mData.add(newNode); currentGroup.mChildren.mData.add(newNode);
} }
@ -449,7 +420,7 @@ public class FusionDictionary implements Iterable<Word> {
if (0 == differentCharIndex) { if (0 == differentCharIndex) {
// Exact same word. Update the frequency if higher. This will also add the // Exact same word. Update the frequency if higher. This will also add the
// new bigrams to the existing bigram list if it already exists. // new bigrams to the existing bigram list if it already exists.
currentGroup.update(frequency, shortcutTargets, bigrams, isShortcutOnly); currentGroup.update(frequency, shortcutTargets, bigrams);
} else { } else {
// Partial prefix match only. We have to replace the current node with a node // Partial prefix match only. We have to replace the current node with a node
// containing the current prefix and create two new ones for the tails. // containing the current prefix and create two new ones for the tails.
@ -457,26 +428,21 @@ public class FusionDictionary implements Iterable<Word> {
final CharGroup newOldWord = new CharGroup( final CharGroup newOldWord = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
currentGroup.mChars.length), currentGroup.mShortcutTargets, currentGroup.mChars.length), currentGroup.mShortcutTargets,
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren, currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
currentGroup.mIsShortcutOnly);
newChildren.mData.add(newOldWord); newChildren.mData.add(newOldWord);
final CharGroup newParent; final CharGroup newParent;
if (charIndex + differentCharIndex >= word.length) { if (charIndex + differentCharIndex >= word.length) {
newParent = new CharGroup( newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly); shortcutTargets, bigrams, frequency, newChildren);
} else { } else {
// isShortcutOnly makes no sense for non-terminal nodes. The following node
// is non-terminal (frequency 0 in FusionDictionary representation) so we
// pass false for isShortcutOnly
newParent = new CharGroup( newParent = new CharGroup(
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
null, null, -1, newChildren, false /* isShortcutOnly */); null, null, -1, newChildren);
final CharGroup newWord = new CharGroup( final CharGroup newWord = new CharGroup(
Arrays.copyOfRange(word, charIndex + differentCharIndex, Arrays.copyOfRange(word, charIndex + differentCharIndex,
word.length), shortcutTargets, bigrams, frequency, word.length), shortcutTargets, bigrams, frequency);
isShortcutOnly);
final int addIndex = word[charIndex + differentCharIndex] final int addIndex = word[charIndex + differentCharIndex]
> currentGroup.mChars[differentCharIndex] ? 1 : 0; > currentGroup.mChars[differentCharIndex] ? 1 : 0;
newChildren.mData.add(addIndex, newWord); newChildren.mData.add(addIndex, newWord);
@ -534,8 +500,7 @@ public class FusionDictionary implements Iterable<Word> {
*/ */
private static int findInsertionIndex(final Node node, int character) { private static int findInsertionIndex(final Node node, int character) {
final ArrayList<CharGroup> data = node.mData; final ArrayList<CharGroup> data = node.mData;
final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0, final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0);
false /* isShortcutOnly */);
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
return result >= 0 ? result : -result - 1; return result >= 0 ? result : -result - 1;
} }
@ -763,8 +728,7 @@ public class FusionDictionary implements Iterable<Word> {
} }
if (currentGroup.mFrequency >= 0) if (currentGroup.mFrequency >= 0)
return new Word(mCurrentString.toString(), currentGroup.mFrequency, return new Word(mCurrentString.toString(), currentGroup.mFrequency,
currentGroup.mShortcutTargets, currentGroup.mBigrams, currentGroup.mShortcutTargets, currentGroup.mBigrams);
currentGroup.mIsShortcutOnly);
} else { } else {
mPositions.removeLast(); mPositions.removeLast();
currentPos = mPositions.getLast(); currentPos = mPositions.getLast();

View File

@ -29,7 +29,6 @@ import java.util.Arrays;
public class Word implements Comparable<Word> { public class Word implements Comparable<Word> {
final String mWord; final String mWord;
final int mFrequency; final int mFrequency;
final boolean mIsShortcutOnly;
final ArrayList<WeightedString> mShortcutTargets; final ArrayList<WeightedString> mShortcutTargets;
final ArrayList<WeightedString> mBigrams; final ArrayList<WeightedString> mBigrams;
@ -37,19 +36,17 @@ public class Word implements Comparable<Word> {
public Word(final String word, final int frequency, public Word(final String word, final int frequency,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<WeightedString> bigrams, final boolean isShortcutOnly) { final ArrayList<WeightedString> bigrams) {
mWord = word; mWord = word;
mFrequency = frequency; mFrequency = frequency;
mShortcutTargets = shortcutTargets; mShortcutTargets = shortcutTargets;
mBigrams = bigrams; mBigrams = bigrams;
mIsShortcutOnly = isShortcutOnly;
} }
private static int computeHashCode(Word word) { private static int computeHashCode(Word word) {
return Arrays.hashCode(new Object[] { return Arrays.hashCode(new Object[] {
word.mWord, word.mWord,
word.mFrequency, word.mFrequency,
word.mIsShortcutOnly,
word.mShortcutTargets.hashCode(), word.mShortcutTargets.hashCode(),
word.mBigrams.hashCode() word.mBigrams.hashCode()
}); });
@ -80,7 +77,6 @@ public class Word implements Comparable<Word> {
if (!(o instanceof Word)) return false; if (!(o instanceof Word)) return false;
Word w = (Word)o; Word w = (Word)o;
return mFrequency == w.mFrequency && mWord.equals(w.mWord) return mFrequency == w.mFrequency && mWord.equals(w.mWord)
&& mIsShortcutOnly == w.mIsShortcutOnly
&& mShortcutTargets.equals(w.mShortcutTargets) && mShortcutTargets.equals(w.mShortcutTargets)
&& mBigrams.equals(w.mBigrams); && mBigrams.equals(w.mBigrams);
} }

View File

@ -46,7 +46,6 @@ public class XmlDictInputOutput {
private static final String SHORTCUT_TAG = "shortcut"; private static final String SHORTCUT_TAG = "shortcut";
private static final String FREQUENCY_ATTR = "f"; private static final String FREQUENCY_ATTR = "f";
private static final String WORD_ATTR = "word"; private static final String WORD_ATTR = "word";
private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly";
private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1; private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
@ -241,15 +240,6 @@ public class XmlDictInputOutput {
new UnigramHandler(dict, shortcutHandler.getShortcutMap(), new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
bigramHandler.getBigramMap()); bigramHandler.getBigramMap());
parser.parse(unigrams, unigramHandler); parser.parse(unigrams, unigramHandler);
final HashMap<String, ArrayList<WeightedString>> shortcutMap =
shortcutHandler.getShortcutMap();
for (final String shortcut : shortcutMap.keySet()) {
if (dict.hasWord(shortcut)) continue;
// TODO: list a frequency in the shortcut file and use it here, instead of
// a constant freq
dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
}
return dict; return dict;
} }
@ -291,8 +281,7 @@ public class XmlDictInputOutput {
destination.write("<!-- Warning: there is no code to read this format yet. -->\n"); destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
for (Word word : set) { for (Word word : set) {
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" " destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
+ FREQUENCY_ATTR + "=\"" + word.mFrequency + "\" " + SHORTCUT_ONLY_ATTR + FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
+ "=\"" + word.mIsShortcutOnly + "\">");
if (null != word.mShortcutTargets) { if (null != word.mShortcutTargets) {
destination.write("\n"); destination.write("\n");
for (WeightedString target : word.mShortcutTargets) { for (WeightedString target : word.mShortcutTargets) {