Merge "Make makedict able to write binary format versions 1 and 2"
This commit is contained in:
commit
36aa8e39b5
2 changed files with 87 additions and 19 deletions
|
@ -112,8 +112,10 @@ public class BinaryDictInputOutput {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
private static final int MAGIC_NUMBER = 0x78B1;
|
private static final int MAGIC_NUMBER = 0x78B1;
|
||||||
private static final int VERSION = 1;
|
private static final int MINIMUM_SUPPORTED_VERSION = 1;
|
||||||
private static final int MAXIMUM_SUPPORTED_VERSION = VERSION;
|
private static final int MAXIMUM_SUPPORTED_VERSION = 2;
|
||||||
|
private static final int FIRST_VERSION_WITH_HEADER_SIZE = 2;
|
||||||
|
|
||||||
// No options yet, reserved for future use.
|
// No options yet, reserved for future use.
|
||||||
private static final int OPTIONS = 0;
|
private static final int OPTIONS = 0;
|
||||||
|
|
||||||
|
@ -797,9 +799,10 @@ public class BinaryDictInputOutput {
|
||||||
*
|
*
|
||||||
* @param destination the stream to write the binary data to.
|
* @param destination the stream to write the binary data to.
|
||||||
* @param dict the dictionary to write.
|
* @param dict the dictionary to write.
|
||||||
|
* @param version the version of the format to write, currently either 1 or 2.
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict)
|
public static void writeDictionaryBinary(OutputStream destination, FusionDictionary dict,
|
||||||
throws IOException {
|
final int version) throws IOException, UnsupportedFormatException {
|
||||||
|
|
||||||
// Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
|
// Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
|
||||||
// can be relative to each node, the structure itself is not limited to 16MB at all, but
|
// can be relative to each node, the structure itself is not limited to 16MB at all, but
|
||||||
|
@ -811,16 +814,30 @@ public class BinaryDictInputOutput {
|
||||||
final byte[] buffer = new byte[1 << 24];
|
final byte[] buffer = new byte[1 << 24];
|
||||||
int index = 0;
|
int index = 0;
|
||||||
|
|
||||||
|
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
|
||||||
|
throw new UnsupportedFormatException("Requested file format version " + version
|
||||||
|
+ ", but this implementation only supports versions "
|
||||||
|
+ MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
|
||||||
|
}
|
||||||
|
|
||||||
// Magic number in big-endian order.
|
// Magic number in big-endian order.
|
||||||
buffer[index++] = (byte) (0xFF & (MAGIC_NUMBER >> 8));
|
buffer[index++] = (byte) (0xFF & (MAGIC_NUMBER >> 8));
|
||||||
buffer[index++] = (byte) (0xFF & MAGIC_NUMBER);
|
buffer[index++] = (byte) (0xFF & MAGIC_NUMBER);
|
||||||
// Dictionary version.
|
// Dictionary version.
|
||||||
buffer[index++] = (byte) (0xFF & VERSION);
|
buffer[index++] = (byte) (0xFF & version);
|
||||||
// Options flags
|
// Options flags
|
||||||
buffer[index++] = (byte) (0xFF & (OPTIONS >> 8));
|
buffer[index++] = (byte) (0xFF & (OPTIONS >> 8));
|
||||||
buffer[index++] = (byte) (0xFF & OPTIONS);
|
buffer[index++] = (byte) (0xFF & OPTIONS);
|
||||||
|
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
|
||||||
|
final int headerSizeOffset = index;
|
||||||
|
index += 3; // Size of the header size
|
||||||
|
|
||||||
// Should we include the locale and title of the dictionary ?
|
// Write out the header contents here.
|
||||||
|
|
||||||
|
buffer[headerSizeOffset] = (byte) (0xFF & (index >> 16));
|
||||||
|
buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 8));
|
||||||
|
buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 0));
|
||||||
|
}
|
||||||
|
|
||||||
destination.write(buffer, 0, index);
|
destination.write(buffer, 0, index);
|
||||||
index = 0;
|
index = 0;
|
||||||
|
@ -1125,7 +1142,16 @@ public class BinaryDictInputOutput {
|
||||||
// Read options
|
// Read options
|
||||||
source.readUnsignedShort();
|
source.readUnsignedShort();
|
||||||
|
|
||||||
long headerSize = source.getFilePointer();
|
final long headerSize;
|
||||||
|
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
|
||||||
|
headerSize = source.getFilePointer();
|
||||||
|
} else {
|
||||||
|
headerSize = source.readUnsignedByte() << 16 + source.readUnsignedByte() << 8
|
||||||
|
+ source.readUnsignedByte();
|
||||||
|
// read the header body
|
||||||
|
source.seek(headerSize);
|
||||||
|
}
|
||||||
|
|
||||||
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
||||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||||
final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping);
|
final Node root = readNode(source, headerSize, reverseNodeMapping, reverseGroupMapping);
|
||||||
|
|
|
@ -41,6 +41,7 @@ public class DictionaryMaker {
|
||||||
private final static String OPTION_INPUT_BIGRAM_XML = "-b";
|
private final static String OPTION_INPUT_BIGRAM_XML = "-b";
|
||||||
private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
|
private final static String OPTION_INPUT_SHORTCUT_XML = "-c";
|
||||||
private final static String OPTION_OUTPUT_BINARY = "-d";
|
private final static String OPTION_OUTPUT_BINARY = "-d";
|
||||||
|
private final static String OPTION_OUTPUT_BINARY_FORMAT_VERSION_1 = "-d1";
|
||||||
private final static String OPTION_OUTPUT_XML = "-x";
|
private final static String OPTION_OUTPUT_XML = "-x";
|
||||||
private final static String OPTION_HELP = "-h";
|
private final static String OPTION_HELP = "-h";
|
||||||
public final String mInputBinary;
|
public final String mInputBinary;
|
||||||
|
@ -48,11 +49,27 @@ public class DictionaryMaker {
|
||||||
public final String mInputShortcutXml;
|
public final String mInputShortcutXml;
|
||||||
public final String mInputBigramXml;
|
public final String mInputBigramXml;
|
||||||
public final String mOutputBinary;
|
public final String mOutputBinary;
|
||||||
|
public final String mOutputBinaryFormat1;
|
||||||
public final String mOutputXml;
|
public final String mOutputXml;
|
||||||
|
|
||||||
private void checkIntegrity() {
|
private void checkIntegrity() throws IOException {
|
||||||
checkHasExactlyOneInput();
|
checkHasExactlyOneInput();
|
||||||
checkHasAtLeastOneOutput();
|
checkHasAtLeastOneOutput();
|
||||||
|
checkNotSameFile(mInputBinary, mOutputBinary);
|
||||||
|
checkNotSameFile(mInputBinary, mOutputBinaryFormat1);
|
||||||
|
checkNotSameFile(mInputBinary, mOutputXml);
|
||||||
|
checkNotSameFile(mInputUnigramXml, mOutputBinary);
|
||||||
|
checkNotSameFile(mInputUnigramXml, mOutputBinaryFormat1);
|
||||||
|
checkNotSameFile(mInputUnigramXml, mOutputXml);
|
||||||
|
checkNotSameFile(mInputShortcutXml, mOutputBinary);
|
||||||
|
checkNotSameFile(mInputShortcutXml, mOutputBinaryFormat1);
|
||||||
|
checkNotSameFile(mInputShortcutXml, mOutputXml);
|
||||||
|
checkNotSameFile(mInputBigramXml, mOutputBinary);
|
||||||
|
checkNotSameFile(mInputBigramXml, mOutputBinaryFormat1);
|
||||||
|
checkNotSameFile(mInputBigramXml, mOutputXml);
|
||||||
|
checkNotSameFile(mOutputBinary, mOutputBinaryFormat1);
|
||||||
|
checkNotSameFile(mOutputBinary, mOutputXml);
|
||||||
|
checkNotSameFile(mOutputBinaryFormat1, mOutputXml);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkHasExactlyOneInput() {
|
private void checkHasExactlyOneInput() {
|
||||||
|
@ -67,26 +84,40 @@ public class DictionaryMaker {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkHasAtLeastOneOutput() {
|
private void checkHasAtLeastOneOutput() {
|
||||||
if (null == mOutputBinary && null == mOutputXml) {
|
if (null == mOutputBinary && null == mOutputBinaryFormat1 && null == mOutputXml) {
|
||||||
throw new RuntimeException("No output specified");
|
throw new RuntimeException("No output specified");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Utility method that throws an exception if path1 and path2 point to the same file.
|
||||||
|
*/
|
||||||
|
private static void checkNotSameFile(final String path1, final String path2)
|
||||||
|
throws IOException {
|
||||||
|
if (null == path1 || null == path2) return;
|
||||||
|
if (new File(path1).getCanonicalPath().equals(new File(path2).getCanonicalPath())) {
|
||||||
|
throw new RuntimeException(path1 + " and " + path2 + " are the same file: "
|
||||||
|
+ " refusing to process.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void displayHelp() {
|
private void displayHelp() {
|
||||||
MakedictLog.i("Usage: makedict "
|
MakedictLog.i("Usage: makedict "
|
||||||
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
|
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts.xml>] "
|
||||||
+ "| -s <binary input>] "
|
+ "| -s <binary input>] [-d <binary output format version 2>] "
|
||||||
+ "[-d <binary output>] [-x <xml output>] [-2]\n"
|
+ "[-d1 <binary output format version 1>] [-x <xml output>] [-2]\n"
|
||||||
+ "\n"
|
+ "\n"
|
||||||
+ " Converts a source dictionary file to one or several outputs.\n"
|
+ " Converts a source dictionary file to one or several outputs.\n"
|
||||||
+ " Source can be an XML file, with an optional XML bigrams file, or a\n"
|
+ " Source can be an XML file, with an optional XML bigrams file, or a\n"
|
||||||
+ " binary dictionary file.\n"
|
+ " binary dictionary file.\n"
|
||||||
+ " Both binary and XML outputs are supported. Both can be output at\n"
|
+ " Binary version 1 (Ice Cream Sandwich), 2 (Jelly Bean) and XML outputs\n"
|
||||||
+ " the same time but outputting several files of the same type is not\n"
|
+ " are supported. All three can be output at the same time, but the same\n"
|
||||||
+ " supported.");
|
+ " output format cannot be specified several times. The behavior is\n"
|
||||||
|
+ " unspecified if the same file is specified for input and output, or for\n"
|
||||||
|
+ " several outputs.");
|
||||||
}
|
}
|
||||||
|
|
||||||
public Arguments(String[] argsArray) {
|
public Arguments(String[] argsArray) throws IOException {
|
||||||
final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
|
final LinkedList<String> args = new LinkedList<String>(Arrays.asList(argsArray));
|
||||||
if (args.isEmpty()) {
|
if (args.isEmpty()) {
|
||||||
displayHelp();
|
displayHelp();
|
||||||
|
@ -96,6 +127,7 @@ public class DictionaryMaker {
|
||||||
String inputShortcutXml = null;
|
String inputShortcutXml = null;
|
||||||
String inputBigramXml = null;
|
String inputBigramXml = null;
|
||||||
String outputBinary = null;
|
String outputBinary = null;
|
||||||
|
String outputBinaryFormat1 = null;
|
||||||
String outputXml = null;
|
String outputXml = null;
|
||||||
|
|
||||||
while (!args.isEmpty()) {
|
while (!args.isEmpty()) {
|
||||||
|
@ -126,6 +158,8 @@ public class DictionaryMaker {
|
||||||
inputBigramXml = filename;
|
inputBigramXml = filename;
|
||||||
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
|
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
|
||||||
outputBinary = filename;
|
outputBinary = filename;
|
||||||
|
} else if (OPTION_OUTPUT_BINARY_FORMAT_VERSION_1.equals(arg)) {
|
||||||
|
outputBinaryFormat1 = filename;
|
||||||
} else if (OPTION_OUTPUT_XML.equals(arg)) {
|
} else if (OPTION_OUTPUT_XML.equals(arg)) {
|
||||||
outputXml = filename;
|
outputXml = filename;
|
||||||
} else {
|
} else {
|
||||||
|
@ -152,6 +186,7 @@ public class DictionaryMaker {
|
||||||
mInputShortcutXml = inputShortcutXml;
|
mInputShortcutXml = inputShortcutXml;
|
||||||
mInputBigramXml = inputBigramXml;
|
mInputBigramXml = inputBigramXml;
|
||||||
mOutputBinary = outputBinary;
|
mOutputBinary = outputBinary;
|
||||||
|
mOutputBinaryFormat1 = outputBinaryFormat1;
|
||||||
mOutputXml = outputXml;
|
mOutputXml = outputXml;
|
||||||
checkIntegrity();
|
checkIntegrity();
|
||||||
}
|
}
|
||||||
|
@ -231,9 +266,13 @@ public class DictionaryMaker {
|
||||||
* @throws IOException if one of the output files can't be written to.
|
* @throws IOException if one of the output files can't be written to.
|
||||||
*/
|
*/
|
||||||
private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
|
private static void writeOutputToParsedArgs(final Arguments args, final FusionDictionary dict)
|
||||||
throws FileNotFoundException, IOException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException,
|
||||||
|
IllegalArgumentException {
|
||||||
if (null != args.mOutputBinary) {
|
if (null != args.mOutputBinary) {
|
||||||
writeBinaryDictionary(args.mOutputBinary, dict);
|
writeBinaryDictionary(args.mOutputBinary, dict, 2);
|
||||||
|
}
|
||||||
|
if (null != args.mOutputBinaryFormat1) {
|
||||||
|
writeBinaryDictionary(args.mOutputBinaryFormat1, dict, 1);
|
||||||
}
|
}
|
||||||
if (null != args.mOutputXml) {
|
if (null != args.mOutputXml) {
|
||||||
writeXmlDictionary(args.mOutputXml, dict);
|
writeXmlDictionary(args.mOutputXml, dict);
|
||||||
|
@ -245,13 +284,16 @@ public class DictionaryMaker {
|
||||||
*
|
*
|
||||||
* @param outputFilename the name of the file to write to.
|
* @param outputFilename the name of the file to write to.
|
||||||
* @param dict the dictionary to write.
|
* @param dict the dictionary to write.
|
||||||
|
* @param version the binary format version to use.
|
||||||
* @throws FileNotFoundException if the output file can't be created.
|
* @throws FileNotFoundException if the output file can't be created.
|
||||||
* @throws IOException if the output file can't be written to.
|
* @throws IOException if the output file can't be written to.
|
||||||
*/
|
*/
|
||||||
private static void writeBinaryDictionary(final String outputFilename,
|
private static void writeBinaryDictionary(final String outputFilename,
|
||||||
final FusionDictionary dict) throws FileNotFoundException, IOException {
|
final FusionDictionary dict, final int version)
|
||||||
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
final File outputFile = new File(outputFilename);
|
final File outputFile = new File(outputFilename);
|
||||||
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict);
|
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
|
||||||
|
version);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in a new issue