Merge "Make makedict able to write binary format versions 1 and 2"

This commit is contained in:
Jean Chalard 2012-02-29 18:24:22 -08:00 committed by Android (Google) Code Review
commit 36aa8e39b5
2 changed files with 87 additions and 19 deletions

View file

@ -112,8 +112,10 @@ public class BinaryDictInputOutput {
*/ */
private static final int MAGIC_NUMBER = 0x78B1; private static final int MAGIC_NUMBER = 0x78B1;
private static final int VERSION = 1; private static final int MINIMUM_SUPPORTED_VERSION = 1;
private static final int MAXIMUM_SUPPORTED_VERSION = VERSION; private static final int MAXIMUM_SUPPORTED_VERSION = 2;
private static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
// No options yet, reserved for future use. // No options yet, reserved for future use.
private static final int OPTIONS = 0; private static final int OPTIONS = 0;
@ -797,9 +799,10 @@ public class BinaryDictInputOutput {
* *
* @param destination the stream to write the binary data to. * @param destination the stream to write the binary data to.
* @param dict the dictionary to write. * @param dict the dictionary to write.
* @param version the version of the format to write, currently either 1 or 2.
*/ */
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict) public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict,
throws IOException { final int version) throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses // Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
// can be relative to each node, the structure itself is not limited to 16MB at all, but // can be relative to each node, the structure itself is not limited to 16MB at all, but
@ -811,16 +814,30 @@ public class BinaryDictInputOutput {
final byte[] buffer = new byte[1 << 24]; final byte[] buffer = new byte[1 << 24];
int index = 0; int index = 0;
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Requested file format version " + version
+ ", but this implementation only supports versions "
+ MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
}
// Magic number in big-endian order. // Magic number in big-endian order.
buffer[index++] = (byte) (0xFF & (MAGIC_NUMBER >> 8)); buffer[index++] = (byte) (0xFF & (MAGIC_NUMBER >> 8));
buffer[index++] = (byte) (0xFF & MAGIC_NUMBER); buffer[index++] = (byte) (0xFF & MAGIC_NUMBER);
// Dictionary version. // Dictionary version.
buffer[index++] = (byte) (0xFF & VERSION); buffer[index++] = (byte) (0xFF & version);
// Options flags // Options flags
buffer[index++] = (byte) (0xFF & (OPTIONS >> 8)); buffer[index++] = (byte) (0xFF & (OPTIONS >> 8));
buffer[index++] = (byte) (0xFF & OPTIONS); buffer[index++] = (byte) (0xFF & OPTIONS);
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
final int headerSizeOffset = index;
index += 3; // Size of the header size
// Should we include the locale and title of the dictionary ? // Write out the header contents here.
buffer[headerSizeOffset] = (byte) (0xFF & (index >> 16));
buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 8));
buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 0));
}
destination.write(buffer, 0, index); destination.write(buffer, 0, index);
index = 0; index = 0;
@ -1125,7 +1142,16 @@ public class BinaryDictInputOutput {
// Read options // Read options
source.readUnsignedShort(); source.readUnsignedShort();
long headerSize = source.getFilePointer(); final long headerSize;
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
headerSize = source.getFilePointer();
} else {
headerSize = source.readUnsignedByte() << 16 + source.readUnsignedByte() << 8
+ source.readUnsignedByte();
// read the header body
source.seek(headerSize);
}
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping); final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping);

View file

@ -41,6 +41,7 @@ public class DictionaryMaker {
private final static String OPTION_INPUT_BIGRAM_XML = "-b"; private final static String OPTION_INPUT_BIGRAM_XML = "-b";
private final static String OPTION_INPUT_SHORTCUT_XML = "-c"; private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
private final static String OPTION_OUTPUT_BINARY = "-d"; private final static String OPTION_OUTPUT_BINARY = "-d";
private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1";
private final static String OPTION_OUTPUT_XML = "-x"; private final static String OPTION_OUTPUT_XML = "-x";
private final static String OPTION_HELP = "-h"; private final static String OPTION_HELP = "-h";
public final String mInputBinary; public final String mInputBinary;
@ -48,11 +49,27 @@ public class DictionaryMaker {
public final String mInputShortcutXml; public final String mInputShortcutXml;
public final String mInputBigramXml; public final String mInputBigramXml;
public final String mOutputBinary; public final String mOutputBinary;
public final String mOutputBinaryFormat1;
public final String mOutputXml; public final String mOutputXml;
private void checkIntegrity() { private void checkIntegrity() throws IOException {
checkHasExactlyOneInput(); checkHasExactlyOneInput();
checkHasAtLeastOneOutput(); checkHasAtLeastOneOutput();
checkNotSameFile(mInputBinary, mOutputBinary);
checkNotSameFile(mInputBinary, mOutputBinaryFormat1);
checkNotSameFile(mInputBinary, mOutputXml);
checkNotSameFile(mInputUnigramXml, mOutputBinary);
checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1);
checkNotSameFile(mInputUnigramXml, mOutputXml);
checkNotSameFile(mInputShortcutXml, mOutputBinary);
checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1);
checkNotSameFile(mInputShortcutXml, mOutputXml);
checkNotSameFile(mInputBigramXml, mOutputBinary);
checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1);
checkNotSameFile(mInputBigramXml, mOutputXml);
checkNotSameFile(mOutputBinary, mOutputBinaryFormat1);
checkNotSameFile(mOutputBinary, mOutputXml);
checkNotSameFile(mOutputBinaryFormat1, mOutputXml);
} }
private void checkHasExactlyOneInput() { private void checkHasExactlyOneInput() {
@ -67,26 +84,40 @@ public class DictionaryMaker {
} }
private void checkHasAtLeastOneOutput() { private void checkHasAtLeastOneOutput() {
if (null == mOutputBinary && null == mOutputXml) { if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) {
throw new RuntimeException("No output specified"); throw new RuntimeException("No output specified");
} }
} }
/**
* Utility method that throws an exception if path1 and path2 point to the same file.
*/
private static void checkNotSameFile(final String path1, final String path2)
throws IOException {
if (null == path1 || null == path2) return;
if (new File(path1).getCanonicalPath().equals(new File(path2).getCanonicalPath())) {
throw new RuntimeException(path1 + " and " + path2 + " are the same file: "
+ " refusing to process.");
}
}
private void displayHelp() { private void displayHelp() {
MakedictLog.i("Usage: makedict " MakedictLog.i("Usage: makedict "
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] " + "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
+ "| -s <binary input>] " + "| -s <binary input>] [-d <binary output format version 2>] "
+ "[-d <binary output>] [-x <xml output>] [-2]\n" + "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
+ "\n" + "\n"
+ " Converts a source dictionary file to one or several outputs.\n" + " Converts a source dictionary file to one or several outputs.\n"
+ " Source can be an XML file, with an optional XML bigrams file, or a\n" + " Source can be an XML file, with an optional XML bigrams file, or a\n"
+ " binary dictionary file.\n" + " binary dictionary file.\n"
+ " Both binary and XML outputs are supported. Both can be output at\n" + " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n"
+ " the same time but outputting several files of the same type is not\n" + " are supported. All three can be output at the same time, but the same\n"
+ " supported."); + " output format cannot be specified several times. The behavior is\n"
+ " unspecified if the same file is specified for input and output, or for\n"
+ " several outputs.");
} }
public Arguments(String[] argsArray) { public Arguments(String[] argsArray) throws IOException {
final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray)); final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
if (args.isEmpty()) { if (args.isEmpty()) {
displayHelp(); displayHelp();
@ -96,6 +127,7 @@ public class DictionaryMaker {
String inputShortcutXml = null; String inputShortcutXml = null;
String inputBigramXml = null; String inputBigramXml = null;
String outputBinary = null; String outputBinary = null;
String outputBinaryFormat1 = null;
String outputXml = null; String outputXml = null;
while (!args.isEmpty()) { while (!args.isEmpty()) {
@ -126,6 +158,8 @@ public class DictionaryMaker {
inputBigramXml = filename; inputBigramXml = filename;
} else if (OPTION_OUTPUT_BINARY.equals(arg)) { } else if (OPTION_OUTPUT_BINARY.equals(arg)) {
outputBinary = filename; outputBinary = filename;
} else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) {
outputBinaryFormat1 = filename;
} else if (OPTION_OUTPUT_XML.equals(arg)) { } else if (OPTION_OUTPUT_XML.equals(arg)) {
outputXml = filename; outputXml = filename;
} else { } else {
@ -152,6 +186,7 @@ public class DictionaryMaker {
mInputShortcutXml = inputShortcutXml; mInputShortcutXml = inputShortcutXml;
mInputBigramXml = inputBigramXml; mInputBigramXml = inputBigramXml;
mOutputBinary = outputBinary; mOutputBinary = outputBinary;
mOutputBinaryFormat1 = outputBinaryFormat1;
mOutputXml = outputXml; mOutputXml = outputXml;
checkIntegrity(); checkIntegrity();
} }
@ -231,9 +266,13 @@ public class DictionaryMaker {
* @throws IOException if one of the output files can't be written to. * @throws IOException if one of the output files can't be written to.
*/ */
private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict) private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
throws FileNotFoundException, IOException { throws FileNotFoundException, IOException, UnsupportedFormatException,
IllegalArgumentException {
if (null != args.mOutputBinary) { if (null != args.mOutputBinary) {
writeBinaryDictionary(args.mOutputBinary, dict); writeBinaryDictionary(args.mOutputBinary, dict, 2);
}
if (null != args.mOutputBinaryFormat1) {
writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1);
} }
if (null != args.mOutputXml) { if (null != args.mOutputXml) {
writeXmlDictionary(args.mOutputXml, dict); writeXmlDictionary(args.mOutputXml, dict);
@ -245,13 +284,16 @@ public class DictionaryMaker {
* *
* @param outputFilename the name of the file to write to. * @param outputFilename the name of the file to write to.
* @param dict the dictionary to write. * @param dict the dictionary to write.
* @param version the binary format version to use.
* @throws FileNotFoundException if the output file can't be created. * @throws FileNotFoundException if the output file can't be created.
* @throws IOException if the output file can't be written to. * @throws IOException if the output file can't be written to.
*/ */
private static void writeBinaryDictionary(final String outputFilename, private static void writeBinaryDictionary(final String outputFilename,
final FusionDictionary dict) throws FileNotFoundException, IOException { final FusionDictionary dict, final int version)
throws FileNotFoundException, IOException, UnsupportedFormatException {
final File outputFile = new File(outputFilename); final File outputFile = new File(outputFilename);
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict); BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
version);
} }
/** /**