diff --git a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java index 92f402d3e..b1d9cc02d 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/BinaryDictInputOutput.java @@ -44,8 +44,9 @@ public class BinaryDictInputOutput { * a | 11 = 3 bytes : FLAG_GROUP_ADDRESS_TYPE_THREEBYTES * g | has several chars ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_MULTIPLE_CHARS * s | has a terminal ? 1 bit, 1 = yes, 0 = no : FLAG_IS_TERMINAL - * | reserved 1 bit, 1 = yes, 0 = no + * | has shortcut targets ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_SHORTCUT_TARGETS * | has bigrams ? 1 bit, 1 = yes, 0 = no : FLAG_HAS_BIGRAMS + * | is shortcut only ? 1 bit, 1 = yes, 0 = no : FLAG_IS_SHORTCUT_ONLY * * c | IF FLAG_HAS_MULTIPLE_CHARS * h | char, char, char, char n * (1 or 3 bytes) : use CharGroupInfo for i/o helpers @@ -71,6 +72,8 @@ public class BinaryDictInputOutput { * d * dress * + * | IF FLAG_IS_TERMINAL && FLAG_HAS_SHORTCUT_TARGETS + * | shortcut targets address list * | IF FLAG_IS_TERMINAL && FLAG_HAS_BIGRAMS * | bigrams address list * @@ -126,7 +129,9 @@ public class BinaryDictInputOutput { private static final int FLAG_HAS_MULTIPLE_CHARS = 0x20; private static final int FLAG_IS_TERMINAL = 0x10; + private static final int FLAG_HAS_SHORTCUT_TARGETS = 0x08; private static final int FLAG_HAS_BIGRAMS = 0x04; + private static final int FLAG_IS_SHORTCUT_ONLY = 0x02; private static final int FLAG_ATTRIBUTE_HAS_NEXT = 0x80; private static final int FLAG_ATTRIBUTE_OFFSET_NEGATIVE = 0x40; @@ -942,11 +947,13 @@ public class BinaryDictInputOutput { source.seek(currentPosition); } nodeContents.add( - new CharGroup(info.mCharacters, bigrams, info.mFrequency, + // TODO: read and pass the shortcut targets + new CharGroup(info.mCharacters, null, bigrams, info.mFrequency, children)); } else { + // TODO: read and pass the shortcut targets nodeContents.add( - new CharGroup(info.mCharacters, bigrams, info.mFrequency)); + new CharGroup(info.mCharacters, null, bigrams, info.mFrequency)); } groupOffset = info.mEndAddress; } @@ -996,7 +1003,8 @@ public class BinaryDictInputOutput { new FusionDictionary.DictionaryOptions()); if (null != dict) { for (Word w : dict) { - newDict.add(w.mWord, w.mFrequency, w.mBigrams); + // TODO: pass the shortcut targets + newDict.add(w.mWord, w.mFrequency, null, w.mBigrams); } } diff --git a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java index f6220eea2..2f6b2c371 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java +++ b/tools/makedict/src/com/android/inputmethod/latin/FusionDictionary.java @@ -68,7 +68,7 @@ public class FusionDictionary implements Iterable { } /** - * A group of characters, with a frequency, shortcuts, bigrams, and children. + * A group of characters, with a frequency, shortcut targets, bigrams, and children. * * This is the central class of the in-memory representation. A CharGroup is what can * be seen as a traditional "trie node", except it can hold several characters at the @@ -82,6 +82,7 @@ public class FusionDictionary implements Iterable { public static class CharGroup { public static final int NOT_A_TERMINAL = -1; final int mChars[]; + final ArrayList mShortcutTargets; final ArrayList mBigrams; final int mFrequency; // NOT_A_TERMINAL == mFrequency indicates this is not a terminal. Node mChildren; @@ -89,18 +90,20 @@ public class FusionDictionary implements Iterable { int mCachedSize; int mCachedAddress; - public CharGroup(final int[] chars, + public CharGroup(final int[] chars, final ArrayList shortcutTargets, final ArrayList bigrams, final int frequency) { mChars = chars; mFrequency = frequency; + mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = null; } - public CharGroup(final int[] chars, + public CharGroup(final int[] chars, final ArrayList shortcutTargets, final ArrayList bigrams, final int frequency, final Node children) { mChars = chars; mFrequency = frequency; + mShortcutTargets = shortcutTargets; mBigrams = bigrams; mChildren = children; } @@ -165,18 +168,29 @@ public class FusionDictionary implements Iterable { * * @param word the word to add. * @param frequency the frequency of the word, in the range [0..255]. + * @param shortcutTargets a list of shortcut targets for this word, or null. * @param bigrams a list of bigrams, or null. */ - public void add(String word, int frequency, ArrayList bigrams) { + public void add(final String word, final int frequency, + final ArrayList shortcutTargets, + final ArrayList bigrams) { + if (null != shortcutTargets) { + for (WeightedString target : shortcutTargets) { + final CharGroup t = findWordInTree(mRoot, target.mWord); + if (null == t) { + add(getCodePoints(target.mWord), 0, null, null); + } + } + } if (null != bigrams) { for (WeightedString bigram : bigrams) { final CharGroup t = findWordInTree(mRoot, bigram.mWord); if (null == t) { - add(getCodePoints(bigram.mWord), 0, null); + add(getCodePoints(bigram.mWord), 0, null, null); } } } - add(getCodePoints(word), frequency, bigrams); + add(getCodePoints(word), frequency, shortcutTargets, bigrams); } /** @@ -200,14 +214,17 @@ public class FusionDictionary implements Iterable { /** * Add a word to this dictionary. * - * The bigrams, if any, have to be in the dictionary already. If they aren't, + * The shortcuts and bigrams, if any, have to be in the dictionary already. If they aren't, * an exception is thrown. * * @param word the word, as an int array. * @param frequency the frequency of the word, in the range [0..255]. + * @param shortcutTargets an optional list of shortcut targets for this word (null if none). * @param bigrams an optional list of bigrams for this word (null if none). */ - private void add(int[] word, int frequency, ArrayList bigrams) { + private void add(final int[] word, final int frequency, + final ArrayList shortcutTargets, + final ArrayList bigrams) { assert(frequency >= 0 && frequency <= 255); Node currentNode = mRoot; int charIndex = 0; @@ -231,7 +248,8 @@ public class FusionDictionary implements Iterable { // No node at this point to accept the word. Create one. final int insertionIndex = findInsertionIndex(currentNode, word[charIndex]); final CharGroup newGroup = new CharGroup( - Arrays.copyOfRange(word, charIndex, word.length), bigrams, frequency); + Arrays.copyOfRange(word, charIndex, word.length), + shortcutTargets, bigrams, frequency); currentNode.mData.add(insertionIndex, newGroup); checkStack(currentNode); } else { @@ -245,7 +263,7 @@ public class FusionDictionary implements Iterable { + new String(word, 0, word.length)); } else { final CharGroup newNode = new CharGroup(currentGroup.mChars, - bigrams, frequency, currentGroup.mChildren); + shortcutTargets, bigrams, frequency, currentGroup.mChildren); currentNode.mData.set(nodeIndex, newNode); checkStack(currentNode); } @@ -254,7 +272,7 @@ public class FusionDictionary implements Iterable { // We only have to create a new node and add it to the end of this. final CharGroup newNode = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, word.length), - bigrams, frequency); + shortcutTargets, bigrams, frequency); currentGroup.mChildren = new Node(); currentGroup.mChildren.mData.add(newNode); } @@ -268,7 +286,8 @@ public class FusionDictionary implements Iterable { + new String(word, 0, word.length)); } final CharGroup newGroup = new CharGroup(word, - currentGroup.mBigrams, frequency, currentGroup.mChildren); + currentGroup.mShortcutTargets, currentGroup.mBigrams, + frequency, currentGroup.mChildren); currentNode.mData.set(nodeIndex, newGroup); } } else { @@ -277,7 +296,7 @@ public class FusionDictionary implements Iterable { Node newChildren = new Node(); final CharGroup newOldWord = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, differentCharIndex, - currentGroup.mChars.length), + currentGroup.mChars.length), currentGroup.mShortcutTargets, currentGroup.mBigrams, currentGroup.mFrequency, currentGroup.mChildren); newChildren.mData.add(newOldWord); @@ -285,14 +304,14 @@ public class FusionDictionary implements Iterable { if (charIndex + differentCharIndex >= word.length) { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - bigrams, frequency, newChildren); + shortcutTargets, bigrams, frequency, newChildren); } else { newParent = new CharGroup( Arrays.copyOfRange(currentGroup.mChars, 0, differentCharIndex), - null, -1, newChildren); + null, null, -1, newChildren); final CharGroup newWord = new CharGroup( Arrays.copyOfRange(word, charIndex + differentCharIndex, - word.length), bigrams, frequency); + word.length), shortcutTargets, bigrams, frequency); final int addIndex = word[charIndex + differentCharIndex] > currentGroup.mChars[differentCharIndex] ? 1 : 0; newChildren.mData.add(addIndex, newWord); @@ -355,7 +374,7 @@ public class FusionDictionary implements Iterable { */ private static int findInsertionIndex(final Node node, int character) { final List data = node.mData; - final CharGroup reference = new CharGroup(new int[] { character }, null, 0); + final CharGroup reference = new CharGroup(new int[] { character }, null, null, 0); int result = Collections.binarySearch(data, reference, CHARGROUP_COMPARATOR); return result >= 0 ? result : -result - 1; } @@ -573,6 +592,7 @@ public class FusionDictionary implements Iterable { } if (currentGroup.mFrequency >= 0) return new Word(mCurrentString.toString(), currentGroup.mFrequency, + // TODO: pass the shortcut targets here currentGroup.mBigrams); } else { mPositions.removeLast(); diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java index 4720e9d10..19ed9d8d2 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java @@ -107,7 +107,8 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { - mDictionary.add(mWord, mFreq, mBigramsMap.get(mWord)); + // TODO: pass the shortcut targets + mDictionary.add(mWord, mFreq, null, mBigramsMap.get(mWord)); mState = START; } } diff --git a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java index 79cf14b2b..6ac046bbf 100644 --- a/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java +++ b/tools/makedict/tests/com/android/inputmethod/latin/BinaryDictInputOutputTest.java @@ -39,11 +39,11 @@ public class BinaryDictInputOutputTest extends TestCase { // that it does not contain any duplicates. public void testFlattenNodes() { final FusionDictionary dict = new FusionDictionary(); - dict.add("foo", 1, null); - dict.add("fta", 1, null); - dict.add("ftb", 1, null); - dict.add("bar", 1, null); - dict.add("fool", 1, null); + dict.add("foo", 1, null, null); + dict.add("fta", 1, null, null); + dict.add("ftb", 1, null, null); + dict.add("bar", 1, null, null); + dict.add("fool", 1, null, null); final ArrayList result = BinaryDictInputOutput.flattenTree(dict.mRoot); assertEquals(4, result.size()); while (!result.isEmpty()) {