Remove the shortcutOnly attribute which is now useless.
Change-Id: Ifccdfdaf7c0066bb7728981503baceff0fedb71fmain
parent
fb64d0cd03
commit
8cf1a8d04f
|
@ -1172,11 +1172,10 @@ public class BinaryDictInputOutput {
|
||||||
}
|
}
|
||||||
nodeContents.add(
|
nodeContents.add(
|
||||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
|
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
|
||||||
children, false));
|
children));
|
||||||
} else {
|
} else {
|
||||||
nodeContents.add(
|
nodeContents.add(
|
||||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency,
|
new CharGroup(info.mCharacters, shortcutTargets, bigrams, info.mFrequency));
|
||||||
false));
|
|
||||||
}
|
}
|
||||||
groupOffset = info.mEndAddress;
|
groupOffset = info.mEndAddress;
|
||||||
}
|
}
|
||||||
|
|
|
@ -98,35 +98,24 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
ArrayList<WeightedString> mShortcutTargets;
|
ArrayList<WeightedString> mShortcutTargets;
|
||||||
ArrayList<WeightedString> mBigrams;
|
ArrayList<WeightedString> mBigrams;
|
||||||
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
|
int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal.
|
||||||
boolean mIsShortcutOnly; // Only valid if this is a terminal.
|
|
||||||
Node mChildren;
|
Node mChildren;
|
||||||
// The two following members to help with binary generation
|
// The two following members to help with binary generation
|
||||||
int mCachedSize;
|
int mCachedSize;
|
||||||
int mCachedAddress;
|
int mCachedAddress;
|
||||||
|
|
||||||
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<WeightedString> bigrams, final int frequency,
|
final ArrayList<WeightedString> bigrams, final int frequency) {
|
||||||
final boolean isShortcutOnly) {
|
|
||||||
mChars = chars;
|
mChars = chars;
|
||||||
mFrequency = frequency;
|
mFrequency = frequency;
|
||||||
mIsShortcutOnly = isShortcutOnly;
|
|
||||||
if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
|
|
||||||
throw new RuntimeException("A node must be a terminal to be a shortcut only");
|
|
||||||
}
|
|
||||||
mShortcutTargets = shortcutTargets;
|
mShortcutTargets = shortcutTargets;
|
||||||
mBigrams = bigrams;
|
mBigrams = bigrams;
|
||||||
mChildren = null;
|
mChildren = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
public CharGroup(final int[] chars, final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<WeightedString> bigrams, final int frequency, final Node children,
|
final ArrayList<WeightedString> bigrams, final int frequency, final Node children) {
|
||||||
final boolean isShortcutOnly) {
|
|
||||||
mChars = chars;
|
mChars = chars;
|
||||||
mFrequency = frequency;
|
mFrequency = frequency;
|
||||||
mIsShortcutOnly = isShortcutOnly;
|
|
||||||
if (mIsShortcutOnly && NOT_A_TERMINAL == mFrequency) {
|
|
||||||
throw new RuntimeException("A node must be a terminal to be a shortcut only");
|
|
||||||
}
|
|
||||||
mShortcutTargets = shortcutTargets;
|
mShortcutTargets = shortcutTargets;
|
||||||
mBigrams = bigrams;
|
mBigrams = bigrams;
|
||||||
mChildren = children;
|
mChildren = children;
|
||||||
|
@ -205,7 +194,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
* updated if they are higher than the existing ones.
|
* updated if they are higher than the existing ones.
|
||||||
*/
|
*/
|
||||||
public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
|
public void update(int frequency, ArrayList<WeightedString> shortcutTargets,
|
||||||
ArrayList<WeightedString> bigrams, boolean isShortcutOnly) {
|
ArrayList<WeightedString> bigrams) {
|
||||||
if (frequency > mFrequency) {
|
if (frequency > mFrequency) {
|
||||||
mFrequency = frequency;
|
mFrequency = frequency;
|
||||||
}
|
}
|
||||||
|
@ -241,7 +230,6 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mIsShortcutOnly = isShortcutOnly;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -304,7 +292,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
for (WeightedString word : words) {
|
for (WeightedString word : words) {
|
||||||
final CharGroup t = findWordInTree(mRoot, word.mWord);
|
final CharGroup t = findWordInTree(mRoot, word.mWord);
|
||||||
if (null == t) {
|
if (null == t) {
|
||||||
add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */);
|
add(getCodePoints(word.mWord), 0, null, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -328,7 +316,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
if (null != bigrams) {
|
if (null != bigrams) {
|
||||||
addNeutralWords(bigrams);
|
addNeutralWords(bigrams);
|
||||||
}
|
}
|
||||||
add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */);
|
add(getCodePoints(word), frequency, shortcutTargets, bigrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -349,21 +337,6 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method to add a shortcut that should not be a dictionary word.
|
|
||||||
*
|
|
||||||
* @param word the word to add.
|
|
||||||
* @param frequency the frequency of the word, in the range [0..255].
|
|
||||||
* @param shortcutTargets a list of shortcut targets. May not be null.
|
|
||||||
*/
|
|
||||||
public void addShortcutOnly(final String word, final int frequency,
|
|
||||||
final ArrayList<WeightedString> shortcutTargets) {
|
|
||||||
if (null == shortcutTargets) {
|
|
||||||
throw new RuntimeException("Can't add a shortcut without targets");
|
|
||||||
}
|
|
||||||
add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to add a new bigram to the dictionary.
|
* Helper method to add a new bigram to the dictionary.
|
||||||
*
|
*
|
||||||
|
@ -377,7 +350,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
|
final CharGroup charGroup2 = findWordInTree(mRoot, word2);
|
||||||
if (charGroup2 == null) {
|
if (charGroup2 == null) {
|
||||||
// TODO: refactor with the identical code in addNeutralWords
|
// TODO: refactor with the identical code in addNeutralWords
|
||||||
add(getCodePoints(word2), 0, null, null, false /* isShortcutOnly */);
|
add(getCodePoints(word2), 0, null, null);
|
||||||
}
|
}
|
||||||
charGroup.addBigram(word2, frequency);
|
charGroup.addBigram(word2, frequency);
|
||||||
} else {
|
} else {
|
||||||
|
@ -395,12 +368,10 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
* @param frequency the frequency of the word, in the range [0..255].
|
* @param frequency the frequency of the word, in the range [0..255].
|
||||||
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
|
* @param shortcutTargets an optional list of shortcut targets for this word (null if none).
|
||||||
* @param bigrams an optional list of bigrams for this word (null if none).
|
* @param bigrams an optional list of bigrams for this word (null if none).
|
||||||
* @param isShortcutOnly whether this should be a shortcut only.
|
|
||||||
*/
|
*/
|
||||||
private void add(final int[] word, final int frequency,
|
private void add(final int[] word, final int frequency,
|
||||||
final ArrayList<WeightedString> shortcutTargets,
|
final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<WeightedString> bigrams,
|
final ArrayList<WeightedString> bigrams) {
|
||||||
final boolean isShortcutOnly) {
|
|
||||||
assert(frequency >= 0 && frequency <= 255);
|
assert(frequency >= 0 && frequency <= 255);
|
||||||
Node currentNode = mRoot;
|
Node currentNode = mRoot;
|
||||||
int charIndex = 0;
|
int charIndex = 0;
|
||||||
|
@ -425,7 +396,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
|
final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
|
||||||
final CharGroup newGroup = new CharGroup(
|
final CharGroup newGroup = new CharGroup(
|
||||||
Arrays.copyOfRange(word, charIndex, word.length),
|
Arrays.copyOfRange(word, charIndex, word.length),
|
||||||
shortcutTargets, bigrams, frequency, isShortcutOnly);
|
shortcutTargets, bigrams, frequency);
|
||||||
currentNode.mData.add(insertionIndex, newGroup);
|
currentNode.mData.add(insertionIndex, newGroup);
|
||||||
checkStack(currentNode);
|
checkStack(currentNode);
|
||||||
} else {
|
} else {
|
||||||
|
@ -435,13 +406,13 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
// The new word is a prefix of an existing word, but the node on which it
|
// The new word is a prefix of an existing word, but the node on which it
|
||||||
// should end already exists as is. Since the old CharNode was not a terminal,
|
// should end already exists as is. Since the old CharNode was not a terminal,
|
||||||
// make it one by filling in its frequency and other attributes
|
// make it one by filling in its frequency and other attributes
|
||||||
currentGroup.update(frequency, shortcutTargets, bigrams, isShortcutOnly);
|
currentGroup.update(frequency, shortcutTargets, bigrams);
|
||||||
} else {
|
} else {
|
||||||
// The new word matches the full old word and extends past it.
|
// The new word matches the full old word and extends past it.
|
||||||
// We only have to create a new node and add it to the end of this.
|
// We only have to create a new node and add it to the end of this.
|
||||||
final CharGroup newNode = new CharGroup(
|
final CharGroup newNode = new CharGroup(
|
||||||
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
|
Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
|
||||||
shortcutTargets, bigrams, frequency, isShortcutOnly);
|
shortcutTargets, bigrams, frequency);
|
||||||
currentGroup.mChildren = new Node();
|
currentGroup.mChildren = new Node();
|
||||||
currentGroup.mChildren.mData.add(newNode);
|
currentGroup.mChildren.mData.add(newNode);
|
||||||
}
|
}
|
||||||
|
@ -449,7 +420,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
if (0 == differentCharIndex) {
|
if (0 == differentCharIndex) {
|
||||||
// Exact same word. Update the frequency if higher. This will also add the
|
// Exact same word. Update the frequency if higher. This will also add the
|
||||||
// new bigrams to the existing bigram list if it already exists.
|
// new bigrams to the existing bigram list if it already exists.
|
||||||
currentGroup.update(frequency, shortcutTargets, bigrams, isShortcutOnly);
|
currentGroup.update(frequency, shortcutTargets, bigrams);
|
||||||
} else {
|
} else {
|
||||||
// Partial prefix match only. We have to replace the current node with a node
|
// Partial prefix match only. We have to replace the current node with a node
|
||||||
// containing the current prefix and create two new ones for the tails.
|
// containing the current prefix and create two new ones for the tails.
|
||||||
|
@ -457,26 +428,21 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
final CharGroup newOldWord = new CharGroup(
|
final CharGroup newOldWord = new CharGroup(
|
||||||
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
|
Arrays.copyOfRange(currentGroup.mChars, differentCharIndex,
|
||||||
currentGroup.mChars.length), currentGroup.mShortcutTargets,
|
currentGroup.mChars.length), currentGroup.mShortcutTargets,
|
||||||
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren,
|
currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren);
|
||||||
currentGroup.mIsShortcutOnly);
|
|
||||||
newChildren.mData.add(newOldWord);
|
newChildren.mData.add(newOldWord);
|
||||||
|
|
||||||
final CharGroup newParent;
|
final CharGroup newParent;
|
||||||
if (charIndex + differentCharIndex >= word.length) {
|
if (charIndex + differentCharIndex >= word.length) {
|
||||||
newParent = new CharGroup(
|
newParent = new CharGroup(
|
||||||
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
|
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
|
||||||
shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly);
|
shortcutTargets, bigrams, frequency, newChildren);
|
||||||
} else {
|
} else {
|
||||||
// isShortcutOnly makes no sense for non-terminal nodes. The following node
|
|
||||||
// is non-terminal (frequency 0 in FusionDictionary representation) so we
|
|
||||||
// pass false for isShortcutOnly
|
|
||||||
newParent = new CharGroup(
|
newParent = new CharGroup(
|
||||||
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
|
Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
|
||||||
null, null, -1, newChildren, false /* isShortcutOnly */);
|
null, null, -1, newChildren);
|
||||||
final CharGroup newWord = new CharGroup(
|
final CharGroup newWord = new CharGroup(
|
||||||
Arrays.copyOfRange(word, charIndex + differentCharIndex,
|
Arrays.copyOfRange(word, charIndex + differentCharIndex,
|
||||||
word.length), shortcutTargets, bigrams, frequency,
|
word.length), shortcutTargets, bigrams, frequency);
|
||||||
isShortcutOnly);
|
|
||||||
final int addIndex = word[charIndex + differentCharIndex]
|
final int addIndex = word[charIndex + differentCharIndex]
|
||||||
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
|
> currentGroup.mChars[differentCharIndex] ? 1 : 0;
|
||||||
newChildren.mData.add(addIndex, newWord);
|
newChildren.mData.add(addIndex, newWord);
|
||||||
|
@ -534,8 +500,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
*/
|
*/
|
||||||
private static int findInsertionIndex(final Node node, int character) {
|
private static int findInsertionIndex(final Node node, int character) {
|
||||||
final ArrayList<CharGroup> data = node.mData;
|
final ArrayList<CharGroup> data = node.mData;
|
||||||
final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0,
|
final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0);
|
||||||
false /* isShortcutOnly */);
|
|
||||||
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
|
int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR);
|
||||||
return result >= 0 ? result : -result - 1;
|
return result >= 0 ? result : -result - 1;
|
||||||
}
|
}
|
||||||
|
@ -763,8 +728,7 @@ public class FusionDictionary implements Iterable<Word> {
|
||||||
}
|
}
|
||||||
if (currentGroup.mFrequency >= 0)
|
if (currentGroup.mFrequency >= 0)
|
||||||
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
|
return new Word(mCurrentString.toString(), currentGroup.mFrequency,
|
||||||
currentGroup.mShortcutTargets, currentGroup.mBigrams,
|
currentGroup.mShortcutTargets, currentGroup.mBigrams);
|
||||||
currentGroup.mIsShortcutOnly);
|
|
||||||
} else {
|
} else {
|
||||||
mPositions.removeLast();
|
mPositions.removeLast();
|
||||||
currentPos = mPositions.getLast();
|
currentPos = mPositions.getLast();
|
||||||
|
|
|
@ -29,7 +29,6 @@ import java.util.Arrays;
|
||||||
public class Word implements Comparable<Word> {
|
public class Word implements Comparable<Word> {
|
||||||
final String mWord;
|
final String mWord;
|
||||||
final int mFrequency;
|
final int mFrequency;
|
||||||
final boolean mIsShortcutOnly;
|
|
||||||
final ArrayList<WeightedString> mShortcutTargets;
|
final ArrayList<WeightedString> mShortcutTargets;
|
||||||
final ArrayList<WeightedString> mBigrams;
|
final ArrayList<WeightedString> mBigrams;
|
||||||
|
|
||||||
|
@ -37,19 +36,17 @@ public class Word implements Comparable<Word> {
|
||||||
|
|
||||||
public Word(final String word, final int frequency,
|
public Word(final String word, final int frequency,
|
||||||
final ArrayList<WeightedString> shortcutTargets,
|
final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<WeightedString> bigrams, final boolean isShortcutOnly) {
|
final ArrayList<WeightedString> bigrams) {
|
||||||
mWord = word;
|
mWord = word;
|
||||||
mFrequency = frequency;
|
mFrequency = frequency;
|
||||||
mShortcutTargets = shortcutTargets;
|
mShortcutTargets = shortcutTargets;
|
||||||
mBigrams = bigrams;
|
mBigrams = bigrams;
|
||||||
mIsShortcutOnly = isShortcutOnly;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private static int computeHashCode(Word word) {
|
private static int computeHashCode(Word word) {
|
||||||
return Arrays.hashCode(new Object[] {
|
return Arrays.hashCode(new Object[] {
|
||||||
word.mWord,
|
word.mWord,
|
||||||
word.mFrequency,
|
word.mFrequency,
|
||||||
word.mIsShortcutOnly,
|
|
||||||
word.mShortcutTargets.hashCode(),
|
word.mShortcutTargets.hashCode(),
|
||||||
word.mBigrams.hashCode()
|
word.mBigrams.hashCode()
|
||||||
});
|
});
|
||||||
|
@ -80,7 +77,6 @@ public class Word implements Comparable<Word> {
|
||||||
if (!(o instanceof Word)) return false;
|
if (!(o instanceof Word)) return false;
|
||||||
Word w = (Word)o;
|
Word w = (Word)o;
|
||||||
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
|
return mFrequency == w.mFrequency && mWord.equals(w.mWord)
|
||||||
&& mIsShortcutOnly == w.mIsShortcutOnly
|
|
||||||
&& mShortcutTargets.equals(w.mShortcutTargets)
|
&& mShortcutTargets.equals(w.mShortcutTargets)
|
||||||
&& mBigrams.equals(w.mBigrams);
|
&& mBigrams.equals(w.mBigrams);
|
||||||
}
|
}
|
||||||
|
|
|
@ -46,7 +46,6 @@ public class XmlDictInputOutput {
|
||||||
private static final String SHORTCUT_TAG = "shortcut";
|
private static final String SHORTCUT_TAG = "shortcut";
|
||||||
private static final String FREQUENCY_ATTR = "f";
|
private static final String FREQUENCY_ATTR = "f";
|
||||||
private static final String WORD_ATTR = "word";
|
private static final String WORD_ATTR = "word";
|
||||||
private static final String SHORTCUT_ONLY_ATTR = "shortcutOnly";
|
|
||||||
|
|
||||||
private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
|
private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
|
||||||
|
|
||||||
|
@ -241,15 +240,6 @@ public class XmlDictInputOutput {
|
||||||
new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
|
new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
|
||||||
bigramHandler.getBigramMap());
|
bigramHandler.getBigramMap());
|
||||||
parser.parse(unigrams, unigramHandler);
|
parser.parse(unigrams, unigramHandler);
|
||||||
|
|
||||||
final HashMap<String, ArrayList<WeightedString>> shortcutMap =
|
|
||||||
shortcutHandler.getShortcutMap();
|
|
||||||
for (final String shortcut : shortcutMap.keySet()) {
|
|
||||||
if (dict.hasWord(shortcut)) continue;
|
|
||||||
// TODO: list a frequency in the shortcut file and use it here, instead of
|
|
||||||
// a constant freq
|
|
||||||
dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
|
|
||||||
}
|
|
||||||
return dict;
|
return dict;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -291,8 +281,7 @@ public class XmlDictInputOutput {
|
||||||
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
|
destination.write("<!-- Warning: there is no code to read this format yet. -->\n");
|
||||||
for (Word word : set) {
|
for (Word word : set) {
|
||||||
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
|
destination.write(" <" + WORD_TAG + " " + WORD_ATTR + "=\"" + word.mWord + "\" "
|
||||||
+ FREQUENCY_ATTR + "=\"" + word.mFrequency + "\" " + SHORTCUT_ONLY_ATTR
|
+ FREQUENCY_ATTR + "=\"" + word.mFrequency + "\">");
|
||||||
+ "=\"" + word.mIsShortcutOnly + "\">");
|
|
||||||
if (null != word.mShortcutTargets) {
|
if (null != word.mShortcutTargets) {
|
||||||
destination.write("\n");
|
destination.write("\n");
|
||||||
for (WeightedString target : word.mShortcutTargets) {
|
for (WeightedString target : word.mShortcutTargets) {
|
||||||
|
|
Loading…
Reference in New Issue