From 342d5d5dd095a5f73413a630b5de9db334ca45e4 Mon Sep 17 00:00:00 2001 From: Jean Chalard Date: Mon, 26 Dec 2011 19:33:37 +0900 Subject: [PATCH] Wire the Xml-read shortcuts into the dict creation code (B6) Change-Id: I352064835abb62c294b48e080d9709ff013c7bb0 --- .../inputmethod/latin/DictionaryMaker.java | 22 ++++++++++++++----- .../inputmethod/latin/XmlDictInputOutput.java | 21 +++++++++++++----- 2 files changed, 31 insertions(+), 12 deletions(-) diff --git a/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java index afd602308..2fcd5750a 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java +++ b/tools/makedict/src/com/android/inputmethod/latin/DictionaryMaker.java @@ -39,11 +39,13 @@ public class DictionaryMaker { private final static String OPTION_VERSION_2 = "-2"; private final static String OPTION_INPUT_SOURCE = "-s"; private final static String OPTION_INPUT_BIGRAM_XML = "-b"; + private final static String OPTION_INPUT_SHORTCUT_XML = "-c"; private final static String OPTION_OUTPUT_BINARY = "-d"; private final static String OPTION_OUTPUT_XML = "-x"; private final static String OPTION_HELP = "-h"; public final String mInputBinary; public final String mInputUnigramXml; + public final String mInputShortcutXml; public final String mInputBigramXml; public final String mOutputBinary; public final String mOutputXml; @@ -72,8 +74,9 @@ public class DictionaryMaker { private void displayHelp() { MakedictLog.i("Usage: makedict " - + "[-s [-b ] | -s ] " - + " [-d ] [-x ] [-2]\n" + + "[-s [-b ] [-c ] " + + "| -s ] " + + "[-d ] [-x ] [-2]\n" + "\n" + " Converts a source dictionary file to one or several outputs.\n" + " Source can be an XML file, with an optional XML bigrams file, or a\n" @@ -90,6 +93,7 @@ public class DictionaryMaker { } String inputBinary = null; String inputUnigramXml = null; + String inputShortcutXml = null; String inputBigramXml = null; String outputBinary = null; String outputXml = null; @@ -116,6 +120,8 @@ public class DictionaryMaker { } else { inputUnigramXml = filename; } + } else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) { + inputShortcutXml = filename; } else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) { inputBigramXml = filename; } else if (OPTION_OUTPUT_BINARY.equals(arg)) { @@ -143,6 +149,7 @@ public class DictionaryMaker { mInputBinary = inputBinary; mInputUnigramXml = inputUnigramXml; + mInputShortcutXml = inputShortcutXml; mInputBigramXml = inputBigramXml; mOutputBinary = outputBinary; mOutputXml = outputXml; @@ -170,7 +177,7 @@ public class DictionaryMaker { if (null != args.mInputBinary) { return readBinaryFile(args.mInputBinary); } else if (null != args.mInputUnigramXml) { - return readXmlFile(args.mInputUnigramXml, args.mInputBigramXml); + return readXmlFile(args.mInputUnigramXml, args.mInputShortcutXml, args.mInputBigramXml); } else { throw new RuntimeException("No input file specified"); } @@ -195,6 +202,7 @@ public class DictionaryMaker { * Read a dictionary from a unigram XML file, and optionally a bigram XML file. * * @param unigramXmlFilename the name of the unigram XML file. May not be null. + * @param shortcutXmlFilename the name of the shortcut XML file, or null if there is none. * @param bigramXmlFilename the name of the bigram XML file. Pass null if there are no bigrams. * @return the read dictionary. * @throws FileNotFoundException if one of the files can't be found @@ -203,12 +211,14 @@ public class DictionaryMaker { * @throws ParserConfigurationException if the system can't create a SAX parser */ private static FusionDictionary readXmlFile(final String unigramXmlFilename, - final String bigramXmlFilename) throws FileNotFoundException, SAXException, - IOException, ParserConfigurationException { + final String shortcutXmlFilename, final String bigramXmlFilename) + throws FileNotFoundException, SAXException, IOException, ParserConfigurationException { final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename)); + final FileInputStream shortcuts = null == shortcutXmlFilename ? null : + new FileInputStream(new File(shortcutXmlFilename)); final FileInputStream bigrams = null == bigramXmlFilename ? null : new FileInputStream(new File(bigramXmlFilename)); - return XmlDictInputOutput.readDictionaryXml(unigrams, bigrams); + return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams); } /** diff --git a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java index 19ed9d8d2..1562b693b 100644 --- a/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java +++ b/tools/makedict/src/com/android/inputmethod/latin/XmlDictInputOutput.java @@ -61,6 +61,7 @@ public class XmlDictInputOutput { int mState; // the state of the parser int mFreq; // the currently read freq String mWord; // the current word + final HashMap> mShortcutsMap; final HashMap> mBigramsMap; /** @@ -69,9 +70,11 @@ public class XmlDictInputOutput { * @param dict the dictionary to construct. * @param bigrams the bigrams as a map. This may be empty, but may not be null. */ - public UnigramHandler(FusionDictionary dict, - HashMap> bigrams) { + public UnigramHandler(final FusionDictionary dict, + final HashMap> shortcuts, + final HashMap> bigrams) { mDictionary = dict; + mShortcutsMap = shortcuts; mBigramsMap = bigrams; mWord = ""; mState = START; @@ -107,8 +110,7 @@ public class XmlDictInputOutput { @Override public void endElement(String uri, String localName, String qName) { if (WORD == mState) { - // TODO: pass the shortcut targets - mDictionary.add(mWord, mFreq, null, mBigramsMap.get(mWord)); + mDictionary.add(mWord, mFreq, mShortcutsMap.get(mWord), mBigramsMap.get(mWord)); mState = START; } } @@ -208,9 +210,12 @@ public class XmlDictInputOutput { * representation. * * @param unigrams the file to read the data from. + * @param shortcuts the file to read the shortcuts from, or null. + * @param bigrams the file to read the bigrams from, or null. * @return the in-memory representation of the dictionary. */ - public static FusionDictionary readDictionaryXml(InputStream unigrams, InputStream bigrams) + public static FusionDictionary readDictionaryXml(final InputStream unigrams, + final InputStream shortcuts, final InputStream bigrams) throws SAXException, IOException, ParserConfigurationException { final SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setNamespaceAware(true); @@ -218,9 +223,13 @@ public class XmlDictInputOutput { final BigramHandler bigramHandler = new BigramHandler(); if (null != bigrams) parser.parse(bigrams, bigramHandler); + final ShortcutHandler shortcutHandler = new ShortcutHandler(); + if (null != shortcuts) parser.parse(shortcuts, shortcutHandler); + final FusionDictionary dict = new FusionDictionary(); final UnigramHandler unigramHandler = - new UnigramHandler(dict, bigramHandler.getBigramMap()); + new UnigramHandler(dict, shortcutHandler.getShortcutMap(), + bigramHandler.getBigramMap()); parser.parse(unigrams, unigramHandler); return dict; }