Merge "Make makedict able to write binary format versions 1 and 2"
This commit is contained in:
commit
36aa8e39b5
2 changed files with 87 additions and 19 deletions
|
@ -112,8 +112,10 @@ public class BinaryDictInputOutput {
|
|||
*/
|
||||
|
||||
private static final int MAGIC_NUMBER = 0x78B1;
|
||||
private static final int VERSION = 1;
|
||||
private static final int MAXIMUM_SUPPORTED_VERSION = VERSION;
|
||||
private static final int MINIMUM_SUPPORTED_VERSION = 1;
|
||||
private static final int MAXIMUM_SUPPORTED_VERSION = 2;
|
||||
private static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
|
||||
|
||||
// No options yet, reserved for future use.
|
||||
private static final int OPTIONS = 0;
|
||||
|
||||
|
@ -797,9 +799,10 @@ public class BinaryDictInputOutput {
|
|||
*
|
||||
* @param destination the stream to write the binary data to.
|
||||
* @param dict the dictionary to write.
|
||||
* @param version the version of the format to write, currently either 1 or 2.
|
||||
*/
|
||||
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict)
|
||||
throws IOException {
|
||||
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict,
|
||||
final int version) throws IOException, UnsupportedFormatException {
|
||||
|
||||
// Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
|
||||
// can be relative to each node, the structure itself is not limited to 16MB at all, but
|
||||
|
@ -811,16 +814,30 @@ public class BinaryDictInputOutput {
|
|||
final byte[] buffer = new byte[1 << 24];
|
||||
int index = 0;
|
||||
|
||||
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
|
||||
throw new UnsupportedFormatException("Requested file format version " + version
|
||||
+ ", but this implementation only supports versions "
|
||||
+ MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
|
||||
}
|
||||
|
||||
// Magic number in big-endian order.
|
||||
buffer[index++] = (byte) (0xFF & (MAGIC_NUMBER >> 8));
|
||||
buffer[index++] = (byte) (0xFF & MAGIC_NUMBER);
|
||||
// Dictionary version.
|
||||
buffer[index++] = (byte) (0xFF & VERSION);
|
||||
buffer[index++] = (byte) (0xFF & version);
|
||||
// Options flags
|
||||
buffer[index++] = (byte) (0xFF & (OPTIONS >> 8));
|
||||
buffer[index++] = (byte) (0xFF & OPTIONS);
|
||||
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
|
||||
final int headerSizeOffset = index;
|
||||
index += 3; // Size of the header size
|
||||
|
||||
// Should we include the locale and title of the dictionary ?
|
||||
// Write out the header contents here.
|
||||
|
||||
buffer[headerSizeOffset] = (byte) (0xFF & (index >> 16));
|
||||
buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 8));
|
||||
buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 0));
|
||||
}
|
||||
|
||||
destination.write(buffer, 0, index);
|
||||
index = 0;
|
||||
|
@ -1125,7 +1142,16 @@ public class BinaryDictInputOutput {
|
|||
// Read options
|
||||
source.readUnsignedShort();
|
||||
|
||||
long headerSize = source.getFilePointer();
|
||||
final long headerSize;
|
||||
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
|
||||
headerSize = source.getFilePointer();
|
||||
} else {
|
||||
headerSize = source.readUnsignedByte() << 16 + source.readUnsignedByte() << 8
|
||||
+ source.readUnsignedByte();
|
||||
// read the header body
|
||||
source.seek(headerSize);
|
||||
}
|
||||
|
||||
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||
final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping);
|
||||
|
|
|
@ -41,6 +41,7 @@ public class DictionaryMaker {
|
|||
private final static String OPTION_INPUT_BIGRAM_XML = "-b";
|
||||
private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
|
||||
private final static String OPTION_OUTPUT_BINARY = "-d";
|
||||
private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1";
|
||||
private final static String OPTION_OUTPUT_XML = "-x";
|
||||
private final static String OPTION_HELP = "-h";
|
||||
public final String mInputBinary;
|
||||
|
@ -48,11 +49,27 @@ public class DictionaryMaker {
|
|||
public final String mInputShortcutXml;
|
||||
public final String mInputBigramXml;
|
||||
public final String mOutputBinary;
|
||||
public final String mOutputBinaryFormat1;
|
||||
public final String mOutputXml;
|
||||
|
||||
private void checkIntegrity() {
|
||||
private void checkIntegrity() throws IOException {
|
||||
checkHasExactlyOneInput();
|
||||
checkHasAtLeastOneOutput();
|
||||
checkNotSameFile(mInputBinary, mOutputBinary);
|
||||
checkNotSameFile(mInputBinary, mOutputBinaryFormat1);
|
||||
checkNotSameFile(mInputBinary, mOutputXml);
|
||||
checkNotSameFile(mInputUnigramXml, mOutputBinary);
|
||||
checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1);
|
||||
checkNotSameFile(mInputUnigramXml, mOutputXml);
|
||||
checkNotSameFile(mInputShortcutXml, mOutputBinary);
|
||||
checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1);
|
||||
checkNotSameFile(mInputShortcutXml, mOutputXml);
|
||||
checkNotSameFile(mInputBigramXml, mOutputBinary);
|
||||
checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1);
|
||||
checkNotSameFile(mInputBigramXml, mOutputXml);
|
||||
checkNotSameFile(mOutputBinary, mOutputBinaryFormat1);
|
||||
checkNotSameFile(mOutputBinary, mOutputXml);
|
||||
checkNotSameFile(mOutputBinaryFormat1, mOutputXml);
|
||||
}
|
||||
|
||||
private void checkHasExactlyOneInput() {
|
||||
|
@ -67,26 +84,40 @@ public class DictionaryMaker {
|
|||
}
|
||||
|
||||
private void checkHasAtLeastOneOutput() {
|
||||
if (null == mOutputBinary && null == mOutputXml) {
|
||||
if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) {
|
||||
throw new RuntimeException("No output specified");
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Utility method that throws an exception if path1 and path2 point to the same file.
|
||||
*/
|
||||
private static void checkNotSameFile(final String path1, final String path2)
|
||||
throws IOException {
|
||||
if (null == path1 || null == path2) return;
|
||||
if (new File(path1).getCanonicalPath().equals(new File(path2).getCanonicalPath())) {
|
||||
throw new RuntimeException(path1 + " and " + path2 + " are the same file: "
|
||||
+ " refusing to process.");
|
||||
}
|
||||
}
|
||||
|
||||
private void displayHelp() {
|
||||
MakedictLog.i("Usage: makedict "
|
||||
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
|
||||
+ "| -s <binary input>] "
|
||||
+ "[-d <binary output>] [-x <xml output>] [-2]\n"
|
||||
+ "| -s <binary input>] [-d <binary output format version 2>] "
|
||||
+ "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
|
||||
+ "\n"
|
||||
+ " Converts a source dictionary file to one or several outputs.\n"
|
||||
+ " Source can be an XML file, with an optional XML bigrams file, or a\n"
|
||||
+ " binary dictionary file.\n"
|
||||
+ " Both binary and XML outputs are supported. Both can be output at\n"
|
||||
+ " the same time but outputting several files of the same type is not\n"
|
||||
+ " supported.");
|
||||
+ " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n"
|
||||
+ " are supported. All three can be output at the same time, but the same\n"
|
||||
+ " output format cannot be specified several times. The behavior is\n"
|
||||
+ " unspecified if the same file is specified for input and output, or for\n"
|
||||
+ " several outputs.");
|
||||
}
|
||||
|
||||
public Arguments(String[] argsArray) {
|
||||
public Arguments(String[] argsArray) throws IOException {
|
||||
final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
|
||||
if (args.isEmpty()) {
|
||||
displayHelp();
|
||||
|
@ -96,6 +127,7 @@ public class DictionaryMaker {
|
|||
String inputShortcutXml = null;
|
||||
String inputBigramXml = null;
|
||||
String outputBinary = null;
|
||||
String outputBinaryFormat1 = null;
|
||||
String outputXml = null;
|
||||
|
||||
while (!args.isEmpty()) {
|
||||
|
@ -126,6 +158,8 @@ public class DictionaryMaker {
|
|||
inputBigramXml = filename;
|
||||
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
|
||||
outputBinary = filename;
|
||||
} else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) {
|
||||
outputBinaryFormat1 = filename;
|
||||
} else if (OPTION_OUTPUT_XML.equals(arg)) {
|
||||
outputXml = filename;
|
||||
} else {
|
||||
|
@ -152,6 +186,7 @@ public class DictionaryMaker {
|
|||
mInputShortcutXml = inputShortcutXml;
|
||||
mInputBigramXml = inputBigramXml;
|
||||
mOutputBinary = outputBinary;
|
||||
mOutputBinaryFormat1 = outputBinaryFormat1;
|
||||
mOutputXml = outputXml;
|
||||
checkIntegrity();
|
||||
}
|
||||
|
@ -231,9 +266,13 @@ public class DictionaryMaker {
|
|||
* @throws IOException if one of the output files can't be written to.
|
||||
*/
|
||||
private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
|
||||
throws FileNotFoundException, IOException {
|
||||
throws FileNotFoundException, IOException, UnsupportedFormatException,
|
||||
IllegalArgumentException {
|
||||
if (null != args.mOutputBinary) {
|
||||
writeBinaryDictionary(args.mOutputBinary, dict);
|
||||
writeBinaryDictionary(args.mOutputBinary, dict, 2);
|
||||
}
|
||||
if (null != args.mOutputBinaryFormat1) {
|
||||
writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1);
|
||||
}
|
||||
if (null != args.mOutputXml) {
|
||||
writeXmlDictionary(args.mOutputXml, dict);
|
||||
|
@ -245,13 +284,16 @@ public class DictionaryMaker {
|
|||
*
|
||||
* @param outputFilename the name of the file to write to.
|
||||
* @param dict the dictionary to write.
|
||||
* @param version the binary format version to use.
|
||||
* @throws FileNotFoundException if the output file can't be created.
|
||||
* @throws IOException if the output file can't be written to.
|
||||
*/
|
||||
private static void writeBinaryDictionary(final String outputFilename,
|
||||
final FusionDictionary dict) throws FileNotFoundException, IOException {
|
||||
final FusionDictionary dict, final int version)
|
||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||
final File outputFile = new File(outputFilename);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
|
||||
version);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue