Actually add shortcut-only entries.

Change-Id: I84bec8fb560cec2ad9cc857397a3f77a96b1d12d
2012-01-16 12:51:46 +09:00 · 2012-01-16 12:51:46 +09:00 · c599f2e9d6
parent d64b8c97fe
commit c599f2e9d6
3 changed files with 64 additions and 23 deletions
--- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java
@ -606,7 +606,9 @@ public class BinaryDictInputOutput {
            }
            flags |= FLAG_HAS_BIGRAMS;
        }
-        // TODO: fill in the FLAG_IS_SHORTCUT_ONLY
+        if (group.mIsShortcutOnly) {
            flags |= FLAG_IS_SHORTCUT_ONLY;
        }
        return flags;
    }
--- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java
@ -170,6 +170,24 @@ public class FusionDictionary implements Iterable<Word> {
        return array;
    }
    /**
     * Helper method to add all words in a list as 0-frequency entries
     *
     * These words are added when shortcuts targets or bigrams are not found in the dictionary
     * yet. The same words may be added later with an actual frequency - this is handled by
     * the private version of add().
     */
    private void addNeutralWords(final ArrayList<WeightedString> words) {
        if (null != words) {
            for (WeightedString word : words) {
                final CharGroup t = findWordInTree(mRoot, word.mWord);
                if (null == t) {
                    add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */);
                }
            }
        }
    }
    /**
     * Helper method to add a word as a string.
     *
@ -186,22 +204,12 @@ public class FusionDictionary implements Iterable<Word> {
            final ArrayList<WeightedString> shortcutTargets,
            final ArrayList<WeightedString> bigrams) {
        if (null != shortcutTargets) {
-            for (WeightedString target : shortcutTargets) {
+            addNeutralWords(shortcutTargets);
                final CharGroup t = findWordInTree(mRoot, target.mWord);
                if (null == t) {
                    add(getCodePoints(target.mWord), 0, null, null);
                }
            }
        }
        if (null != bigrams) {
-            for (WeightedString bigram : bigrams) {
+            addNeutralWords(bigrams);
                final CharGroup t = findWordInTree(mRoot, bigram.mWord);
                if (null == t) {
                    add(getCodePoints(bigram.mWord), 0, null, null);
        }
-            }
+        add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */);
        }
        add(getCodePoints(word), frequency, shortcutTargets, bigrams);
    }
    /**
@ -222,6 +230,22 @@ public class FusionDictionary implements Iterable<Word> {
        }
    }
    /**
     * Helper method to add a shortcut that should not be a dictionary word.
     *
     * @param word the word to add.
     * @param frequency the frequency of the word, in the range [0..255].
     * @param shortcutTargets a list of shortcut targets. May not be null.
     */
    public void addShortcutOnly(final String word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets) {
        if (null == shortcutTargets) {
            throw new RuntimeException("Can't add a shortcut without targets");
        }
        addNeutralWords(shortcutTargets);
        add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */);
    }
    /**
     * Add a word to this dictionary.
     *
@ -232,10 +256,12 @@ public class FusionDictionary implements Iterable<Word> {
     * @param frequency the frequency of the word, in the range [0..255].
     * @param shortcutTargets an optional list of shortcut targets for this word (null if none).
     * @param bigrams an optional list of bigrams for this word (null if none).
     * @param isShortcutOnly whether this should be a shortcut only.
     */
    private void add(final int[] word, final int frequency,
            final ArrayList<WeightedString> shortcutTargets,
-            final ArrayList<WeightedString> bigrams) {
+            final ArrayList<WeightedString> bigrams,
            final boolean isShortcutOnly) {
        assert(frequency >= 0 && frequency <= 255);
        Node currentNode = mRoot;
        int charIndex = 0;
@ -260,7 +286,7 @@ public class FusionDictionary implements Iterable<Word> {
            final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]);
            final CharGroup newGroup = new CharGroup(
                    Arrays.copyOfRange(word, charIndex, word.length),
-                    shortcutTargets, bigrams, frequency, false /* isShortcutOnly */);
+                    shortcutTargets, bigrams, frequency, isShortcutOnly);
            currentNode.mData.add(insertionIndex, newGroup);
            checkStack(currentNode);
        } else {
@ -275,7 +301,7 @@ public class FusionDictionary implements Iterable<Word> {
                    } else {
                        final CharGroup newNode = new CharGroup(currentGroup.mChars,
                                shortcutTargets, bigrams, frequency, currentGroup.mChildren,
-                                false /* isShortcutOnly */);
+                                isShortcutOnly);
                        currentNode.mData.set(nodeIndex, newNode);
                        checkStack(currentNode);
                    }
@ -284,8 +310,7 @@ public class FusionDictionary implements Iterable<Word> {
                    // We only have to create a new node and add it to the end of this.
                    final CharGroup newNode = new CharGroup(
                            Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length),
-                                    shortcutTargets, bigrams, frequency,
+                                    shortcutTargets, bigrams, frequency, isShortcutOnly);
                                    false /* isShortcutOnly */);
                    currentGroup.mChildren = new Node();
                    currentGroup.mChildren.mData.add(newNode);
                }
@ -300,7 +325,8 @@ public class FusionDictionary implements Iterable<Word> {
                        }
                        final CharGroup newGroup = new CharGroup(word,
                                currentGroup.mShortcutTargets, currentGroup.mBigrams,
-                                frequency, currentGroup.mChildren, false /* isShortcutOnly */);
+                                frequency, currentGroup.mChildren,
                                currentGroup.mIsShortcutOnly && isShortcutOnly);
                        currentNode.mData.set(nodeIndex, newGroup);
                    }
                } else {
@ -318,16 +344,18 @@ public class FusionDictionary implements Iterable<Word> {
                    if (charIndex + differentCharIndex >= word.length) {
                        newParent = new CharGroup(
                                Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
-                                shortcutTargets, bigrams, frequency, newChildren,
+                                shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly);
                                false /* isShortcutOnly */);
                    } else {
                        // isShortcutOnly makes no sense for non-terminal nodes. The following node
                        // is non-terminal (frequency 0 in FusionDictionary representation) so we
                        // pass false for isShortcutOnly
                        newParent = new CharGroup(
                                Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex),
                                null, null, -1, newChildren, false /* isShortcutOnly */);
                        final CharGroup newWord = new CharGroup(
                                Arrays.copyOfRange(word, charIndex + differentCharIndex,
                                        word.length), shortcutTargets, bigrams, frequency,
-                                        false /* isShortcutOnly */);
+                                        isShortcutOnly);
                        final int addIndex = word[charIndex + differentCharIndex]
                                > currentGroup.mChars[differentCharIndex] ? 1 : 0;
                        newChildren.mData.add(addIndex, newWord);
--- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
+++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java
@ -46,6 +46,8 @@ public class XmlDictInputOutput {
    private static final String FREQUENCY_ATTR = "f";
    private static final String WORD_ATTR = "word";
    private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1;
    /**
     * SAX handler for a unigram XML file.
     */
@ -232,6 +234,15 @@ public class XmlDictInputOutput {
                new UnigramHandler(dict, shortcutHandler.getShortcutMap(),
                        bigramHandler.getBigramMap());
        parser.parse(unigrams, unigramHandler);
        final HashMap<String, ArrayList<WeightedString>> shortcutMap =
                shortcutHandler.getShortcutMap();
        for (final String shortcut : shortcutMap.keySet()) {
            if (dict.hasWord(shortcut)) continue;
            // TODO: list a frequency in the shortcut file and use it here, instead of
            // a constant freq
            dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut));
        }
        return dict;
    }