From c599f2e9d6ab839f38183aa178684ff0e94178a3 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 16 Jan 2012 12:51:46 +0900 Subject: [PATCH] Actually add shortcut-only entries. Change-Id: I84bec8fb560cec2ad9cc857397a3f77a96b1d12d --- .../latin/BinaryDictInputOutput.java | 4 +- .../inputmethod/latin/FusionDictionary.java | 72 +++++++++++++------ .../inputmethod/latin/XmlDictInputOutput.java | 11 +++ 3 files changed, 64 insertions(+), 23 deletions(-) diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java index 35e8c4818..7aadc677b 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java @@ -606,7 +606,9 @@ public class BinaryDictInputOutput { } flags |= FLAG_HAS_BIGRAMS; } - // TODO: fill in the FLAG_IS_SHORTCUT_ONLY + if (group.mIsShortcutOnly) { + flags |= FLAG_IS_SHORTCUT_ONLY; + } return flags; } diff --git a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java index 3ab206d80..b47b025a9 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java +++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java @@ -170,6 +170,24 @@ public class FusionDictionary implements Iterable { return array; } + /** + * Helper method to add all words in a list as 0-frequency entries + * + * These words are added when shortcuts targets or bigrams are not found in the dictionary + * yet. The same words may be added later with an actual frequency - this is handled by + * the private version of add(). + */ + private void addNeutralWords(final ArrayList words) { + if (null != words) { + for (WeightedString word : words) { + final CharGroup t = findWordInTree(mRoot, word.mWord); + if (null == t) { + add(getCodePoints(word.mWord), 0, null, null, false /* isShortcutOnly */); + } + } + } + } + /** * Helper method to add a word as a string. * @@ -186,22 +204,12 @@ public class FusionDictionary implements Iterable { final ArrayList shortcutTargets, final ArrayList bigrams) { if (null != shortcutTargets) { - for (WeightedString target : shortcutTargets) { - final CharGroup t = findWordInTree(mRoot, target.mWord); - if (null == t) { - add(getCodePoints(target.mWord), 0, null, null); - } - } + addNeutralWords(shortcutTargets); } if (null != bigrams) { - for (WeightedString bigram : bigrams) { - final CharGroup t = findWordInTree(mRoot, bigram.mWord); - if (null == t) { - add(getCodePoints(bigram.mWord), 0, null, null); - } - } + addNeutralWords(bigrams); } - add(getCodePoints(word), frequency, shortcutTargets, bigrams); + add(getCodePoints(word), frequency, shortcutTargets, bigrams, false /* isShortcutOnly */); } /** @@ -222,6 +230,22 @@ public class FusionDictionary implements Iterable { } } + /** + * Helper method to add a shortcut that should not be a dictionary word. + * + * @param word the word to add. + * @param frequency the frequency of the word, in the range [0..255]. + * @param shortcutTargets a list of shortcut targets. May not be null. + */ + public void addShortcutOnly(final String word, final int frequency, + final ArrayList shortcutTargets) { + if (null == shortcutTargets) { + throw new RuntimeException("Can't add a shortcut without targets"); + } + addNeutralWords(shortcutTargets); + add(getCodePoints(word), frequency, shortcutTargets, null, true /* isShortcutOnly */); + } + /** * Add a word to this dictionary. * @@ -232,10 +256,12 @@ public class FusionDictionary implements Iterable { * @param frequency the frequency of the word, in the range [0..255]. * @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param bigrams an optional list of bigrams for this word (null if none). + * @param isShortcutOnly whether this should be a shortcut only. */ private void add(final int[] word, final int frequency, final ArrayList shortcutTargets, - final ArrayList bigrams) { + final ArrayList bigrams, + final boolean isShortcutOnly) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; @@ -260,7 +286,7 @@ public class FusionDictionary implements Iterable { final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( Arrays.copyOfRange(word, charIndex, word.length), - shortcutTargets, bigrams, frequency, false /* isShortcutOnly */); + shortcutTargets, bigrams, frequency, isShortcutOnly); currentNode.mData.add(insertionIndex, newGroup); checkStack(currentNode); } else { @@ -275,7 +301,7 @@ public class FusionDictionary implements Iterable { } else { final CharGroup newNode = new CharGroup(currentGroup.mChars, shortcutTargets, bigrams, frequency, currentGroup.mChildren, - false /* isShortcutOnly */); + isShortcutOnly); currentNode.mData.set(nodeIndex, newNode); checkStack(currentNode); } @@ -284,8 +310,7 @@ public class FusionDictionary implements Iterable { // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - shortcutTargets, bigrams, frequency, - false /* isShortcutOnly */); + shortcutTargets, bigrams, frequency, isShortcutOnly); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } @@ -300,7 +325,8 @@ public class FusionDictionary implements Iterable { } final CharGroup newGroup = new CharGroup(word, currentGroup.mShortcutTargets, currentGroup.mBigrams, - frequency, currentGroup.mChildren, false /* isShortcutOnly */); + frequency, currentGroup.mChildren, + currentGroup.mIsShortcutOnly && isShortcutOnly); currentNode.mData.set(nodeIndex, newGroup); } } else { @@ -318,16 +344,18 @@ public class FusionDictionary implements Iterable { if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - shortcutTargets, bigrams, frequency, newChildren, - false /* isShortcutOnly */); + shortcutTargets, bigrams, frequency, newChildren, isShortcutOnly); } else { + // isShortcutOnly makes no sense for non-terminal nodes. The following node + // is non-terminal (frequency 0 in FusionDictionary representation) so we + // pass false for isShortcutOnly newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), null, null, -1, newChildren, false /* isShortcutOnly */); final CharGroup newWord = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), shortcutTargets, bigrams, frequency, - false /* isShortcutOnly */); + isShortcutOnly); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java index d6c03ed70..58e7f2935 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java @@ -46,6 +46,8 @@ public class XmlDictInputOutput { private static final String FREQUENCY_ATTR = "f"; private static final String WORD_ATTR = "word"; + private static final int SHORTCUT_ONLY_DEFAULT_FREQ = 1; + /** * SAX handler for a unigram XML file. */ @@ -232,6 +234,15 @@ public class XmlDictInputOutput { new UnigramHandler(dict, shortcutHandler.getShortcutMap(), bigramHandler.getBigramMap()); parser.parse(unigrams, unigramHandler); + + final HashMap> shortcutMap = + shortcutHandler.getShortcutMap(); + for (final String shortcut : shortcutMap.keySet()) { + if (dict.hasWord(shortcut)) continue; + // TODO: list a frequency in the shortcut file and use it here, instead of + // a constant freq + dict.addShortcutOnly(shortcut, SHORTCUT_ONLY_DEFAULT_FREQ, shortcutMap.get(shortcut)); + } return dict; }