Make makedict able to write binary format versions 1 and 2

Version 2 provides for a variable-length header region.
The first step is to allow the implementation to simply step
over the header, which will allow us to put any ignored metadata
we want. This change implements the makedict part of this.

Bug: 5686638
Change-Id: I97f1a8a4f84d15e1bedbad74e54303852aef6e0b
main
Jean Chalard 2012-02-27 19:50:34 +09:00
parent 5a0661eae8
commit e0f6cc1814
2 changed files with 87 additions and 19 deletions

View File

@ -112,8 +112,10 @@ public class BinaryDictInputOutput {
*/
private static final int MAGIC_NUMBER = 0x78B1;
private static final int VERSION = 1;
private static final int MAXIMUM_SUPPORTED_VERSION = VERSION;
private static final int MINIMUM_SUPPORTED_VERSION = 1;
private static final int MAXIMUM_SUPPORTED_VERSION = 2;
private static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
// No options yet, reserved for future use.
private static final int OPTIONS = 0;
@ -797,9 +799,10 @@ public class BinaryDictInputOutput {
*
* @param destination the stream to write the binary data to.
* @param dict the dictionary to write.
* @param version the version of the format to write, currently either 1 or 2.
*/
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict)
throws IOException {
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict,
final int version) throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
// can be relative to each node, the structure itself is not limited to 16MB at all, but
@ -811,16 +814,30 @@ public class BinaryDictInputOutput {
final byte[] buffer = new byte[1 << 24];
int index = 0;
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Requested file format version " + version
+ ", but this implementation only supports versions "
+ MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
}
// Magic number in big-endian order.
buffer[index++] = (byte) (0xFF & (MAGIC_NUMBER >> 8));
buffer[index++] = (byte) (0xFF & MAGIC_NUMBER);
// Dictionary version.
buffer[index++] = (byte) (0xFF & VERSION);
buffer[index++] = (byte) (0xFF & version);
// Options flags
buffer[index++] = (byte) (0xFF & (OPTIONS >> 8));
buffer[index++] = (byte) (0xFF & OPTIONS);
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
final int headerSizeOffset = index;
index += 3; // Size of the header size
// Should we include the locale and title of the dictionary ?
// Write out the header contents here.
buffer[headerSizeOffset] = (byte) (0xFF & (index >> 16));
buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 8));
buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 0));
}
destination.write(buffer, 0, index);
index = 0;
@ -1125,7 +1142,16 @@ public class BinaryDictInputOutput {
// Read options
source.readUnsignedShort();
long headerSize = source.getFilePointer();
final long headerSize;
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
headerSize = source.getFilePointer();
} else {
headerSize = source.readUnsignedByte() << 16 + source.readUnsignedByte() << 8
+ source.readUnsignedByte();
// read the header body
source.seek(headerSize);
}
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping);

View File

@ -41,6 +41,7 @@ public class DictionaryMaker {
private final static String OPTION_INPUT_BIGRAM_XML = "-b";
private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
private final static String OPTION_OUTPUT_BINARY = "-d";
private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1";
private final static String OPTION_OUTPUT_XML = "-x";
private final static String OPTION_HELP = "-h";
public final String mInputBinary;
@ -48,11 +49,27 @@ public class DictionaryMaker {
public final String mInputShortcutXml;
public final String mInputBigramXml;
public final String mOutputBinary;
public final String mOutputBinaryFormat1;
public final String mOutputXml;
private void checkIntegrity() {
private void checkIntegrity() throws IOException {
checkHasExactlyOneInput();
checkHasAtLeastOneOutput();
checkNotSameFile(mInputBinary, mOutputBinary);
checkNotSameFile(mInputBinary, mOutputBinaryFormat1);
checkNotSameFile(mInputBinary, mOutputXml);
checkNotSameFile(mInputUnigramXml, mOutputBinary);
checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1);
checkNotSameFile(mInputUnigramXml, mOutputXml);
checkNotSameFile(mInputShortcutXml, mOutputBinary);
checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1);
checkNotSameFile(mInputShortcutXml, mOutputXml);
checkNotSameFile(mInputBigramXml, mOutputBinary);
checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1);
checkNotSameFile(mInputBigramXml, mOutputXml);
checkNotSameFile(mOutputBinary, mOutputBinaryFormat1);
checkNotSameFile(mOutputBinary, mOutputXml);
checkNotSameFile(mOutputBinaryFormat1, mOutputXml);
}
private void checkHasExactlyOneInput() {
@ -67,26 +84,40 @@ public class DictionaryMaker {
}
private void checkHasAtLeastOneOutput() {
if (null == mOutputBinary && null == mOutputXml) {
if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) {
throw new RuntimeException("No output specified");
}
}
/**
* Utility method that throws an exception if path1 and path2 point to the same file.
*/
private static void checkNotSameFile(final String path1, final String path2)
throws IOException {
if (null == path1 || null == path2) return;
if (new File(path1).getCanonicalPath().equals(new File(path2).getCanonicalPath())) {
throw new RuntimeException(path1 + " and " + path2 + " are the same file: "
+ " refusing to process.");
}
}
private void displayHelp() {
MakedictLog.i("Usage: makedict "
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
+ "| -s <binary input>] "
+ "[-d <binary output>] [-x <xml output>] [-2]\n"
+ "| -s <binary input>] [-d <binary output format version 2>] "
+ "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
+ "\n"
+ " Converts a source dictionary file to one or several outputs.\n"
+ " Source can be an XML file, with an optional XML bigrams file, or a\n"
+ " binary dictionary file.\n"
+ " Both binary and XML outputs are supported. Both can be output at\n"
+ " the same time but outputting several files of the same type is not\n"
+ " supported.");
+ " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n"
+ " are supported. All three can be output at the same time, but the same\n"
+ " output format cannot be specified several times. The behavior is\n"
+ " unspecified if the same file is specified for input and output, or for\n"
+ " several outputs.");
}
public Arguments(String[] argsArray) {
public Arguments(String[] argsArray) throws IOException {
final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
if (args.isEmpty()) {
displayHelp();
@ -96,6 +127,7 @@ public class DictionaryMaker {
String inputShortcutXml = null;
String inputBigramXml = null;
String outputBinary = null;
String outputBinaryFormat1 = null;
String outputXml = null;
while (!args.isEmpty()) {
@ -126,6 +158,8 @@ public class DictionaryMaker {
inputBigramXml = filename;
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
outputBinary = filename;
} else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) {
outputBinaryFormat1 = filename;
} else if (OPTION_OUTPUT_XML.equals(arg)) {
outputXml = filename;
} else {
@ -152,6 +186,7 @@ public class DictionaryMaker {
mInputShortcutXml = inputShortcutXml;
mInputBigramXml = inputBigramXml;
mOutputBinary = outputBinary;
mOutputBinaryFormat1 = outputBinaryFormat1;
mOutputXml = outputXml;
checkIntegrity();
}
@ -231,9 +266,13 @@ public class DictionaryMaker {
* @throws IOException if one of the output files can't be written to.
*/
private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
throws FileNotFoundException, IOException {
throws FileNotFoundException, IOException, UnsupportedFormatException,
IllegalArgumentException {
if (null != args.mOutputBinary) {
writeBinaryDictionary(args.mOutputBinary, dict);
writeBinaryDictionary(args.mOutputBinary, dict, 2);
}
if (null != args.mOutputBinaryFormat1) {
writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1);
}
if (null != args.mOutputXml) {
writeXmlDictionary(args.mOutputXml, dict);
@ -245,13 +284,16 @@ public class DictionaryMaker {
*
* @param outputFilename the name of the file to write to.
* @param dict the dictionary to write.
* @param version the binary format version to use.
* @throws FileNotFoundException if the output file can't be created.
* @throws IOException if the output file can't be written to.
*/
private static void writeBinaryDictionary(final String outputFilename,
final FusionDictionary dict) throws FileNotFoundException, IOException {
final FusionDictionary dict, final int version)
throws FileNotFoundException, IOException, UnsupportedFormatException {
final File outputFile = new File(outputFilename);
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict);
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
version);
}
/**