am fd4cb4bd
: Merge "Read dicttool option for switching code point table"
* commit 'fd4cb4bd050c6906c55780f92338a62b8efd70ad': Read dicttool option for switching code point table
This commit is contained in:
commit
b2f45b28d5
3 changed files with 43 additions and 19 deletions
|
@ -69,7 +69,7 @@ public class BinaryDictUtils {
|
||||||
}
|
}
|
||||||
return new Ver4DictEncoder(file);
|
return new Ver4DictEncoder(file);
|
||||||
} else if (formatOptions.mVersion == FormatSpec.VERSION2) {
|
} else if (formatOptions.mVersion == FormatSpec.VERSION2) {
|
||||||
return new Ver2DictEncoder(file);
|
return new Ver2DictEncoder(file, Ver2DictEncoder.CODE_POINT_TABLE_OFF);
|
||||||
} else {
|
} else {
|
||||||
throw new RuntimeException("The format option has a wrong version : "
|
throw new RuntimeException("The format option has a wrong version : "
|
||||||
+ formatOptions.mVersion);
|
+ formatOptions.mVersion);
|
||||||
|
|
|
@ -40,12 +40,16 @@ public class Ver2DictEncoder implements DictEncoder {
|
||||||
private OutputStream mOutStream;
|
private OutputStream mOutStream;
|
||||||
private byte[] mBuffer;
|
private byte[] mBuffer;
|
||||||
private int mPosition;
|
private int mPosition;
|
||||||
|
private final int mCodePointTableMode;
|
||||||
|
public static final int CODE_POINT_TABLE_OFF = 0;
|
||||||
|
public static final int CODE_POINT_TABLE_ON = 1;
|
||||||
|
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public Ver2DictEncoder(final File dictFile) {
|
public Ver2DictEncoder(final File dictFile, final int codePointTableMode) {
|
||||||
mDictFile = dictFile;
|
mDictFile = dictFile;
|
||||||
mOutStream = null;
|
mOutStream = null;
|
||||||
mBuffer = null;
|
mBuffer = null;
|
||||||
|
mCodePointTableMode = codePointTableMode;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This constructor is used only by BinaryDictOffdeviceUtilsTests.
|
// This constructor is used only by BinaryDictOffdeviceUtilsTests.
|
||||||
|
@ -55,6 +59,7 @@ public class Ver2DictEncoder implements DictEncoder {
|
||||||
public Ver2DictEncoder(final OutputStream outStream) {
|
public Ver2DictEncoder(final OutputStream outStream) {
|
||||||
mDictFile = null;
|
mDictFile = null;
|
||||||
mOutStream = outStream;
|
mOutStream = outStream;
|
||||||
|
mCodePointTableMode = CODE_POINT_TABLE_OFF;
|
||||||
}
|
}
|
||||||
|
|
||||||
private void openStream() throws FileNotFoundException {
|
private void openStream() throws FileNotFoundException {
|
||||||
|
|
|
@ -59,6 +59,9 @@ public class DictionaryMaker {
|
||||||
private static final String OPTION_OUTPUT_XML = "-x";
|
private static final String OPTION_OUTPUT_XML = "-x";
|
||||||
private static final String OPTION_OUTPUT_COMBINED = "-o";
|
private static final String OPTION_OUTPUT_COMBINED = "-o";
|
||||||
private static final String OPTION_HELP = "-h";
|
private static final String OPTION_HELP = "-h";
|
||||||
|
private static final String OPTION_CODE_POINT_TABLE = "-t";
|
||||||
|
private static final String OPTION_CODE_POINT_TABLE_OFF = "off";
|
||||||
|
private static final String OPTION_CODE_POINT_TABLE_ON = "on";
|
||||||
public final String mInputBinary;
|
public final String mInputBinary;
|
||||||
public final String mInputCombined;
|
public final String mInputCombined;
|
||||||
public final String mInputUnigramXml;
|
public final String mInputUnigramXml;
|
||||||
|
@ -68,6 +71,7 @@ public class DictionaryMaker {
|
||||||
public final String mOutputXml;
|
public final String mOutputXml;
|
||||||
public final String mOutputCombined;
|
public final String mOutputCombined;
|
||||||
public final int mOutputBinaryFormatVersion;
|
public final int mOutputBinaryFormatVersion;
|
||||||
|
public final int mCodePointTableMode;
|
||||||
|
|
||||||
private void checkIntegrity() throws IOException {
|
private void checkIntegrity() throws IOException {
|
||||||
checkHasExactlyOneInput();
|
checkHasExactlyOneInput();
|
||||||
|
@ -131,7 +135,7 @@ public class DictionaryMaker {
|
||||||
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] "
|
+ "[-s <unigrams.xml> [-b <bigrams.xml>] [-c <shortcuts_and_whitelist.xml>] "
|
||||||
+ "| [-s <combined format input]"
|
+ "| [-s <combined format input]"
|
||||||
+ "| [-s <binary input>] [-d <binary output>] [-x <xml output>] "
|
+ "| [-s <binary input>] [-d <binary output>] [-x <xml output>] "
|
||||||
+ " [-o <combined output>]"
|
+ " [-o <combined output>] [-t <code point table switch: on/off/auto>]"
|
||||||
+ "[-2] [-3] [-4]\n"
|
+ "[-2] [-3] [-4]\n"
|
||||||
+ "\n"
|
+ "\n"
|
||||||
+ " Converts a source dictionary file to one or several outputs.\n"
|
+ " Converts a source dictionary file to one or several outputs.\n"
|
||||||
|
@ -155,6 +159,8 @@ public class DictionaryMaker {
|
||||||
String outputXml = null;
|
String outputXml = null;
|
||||||
String outputCombined = null;
|
String outputCombined = null;
|
||||||
int outputBinaryFormatVersion = 2; // the default version is 2.
|
int outputBinaryFormatVersion = 2; // the default version is 2.
|
||||||
|
// Don't use code point table by default.
|
||||||
|
int codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
|
||||||
|
|
||||||
while (!args.isEmpty()) {
|
while (!args.isEmpty()) {
|
||||||
final String arg = args.get(0);
|
final String arg = args.get(0);
|
||||||
|
@ -172,29 +178,38 @@ public class DictionaryMaker {
|
||||||
throw new IllegalArgumentException("Option " + arg + " is unknown or "
|
throw new IllegalArgumentException("Option " + arg + " is unknown or "
|
||||||
+ "requires an argument");
|
+ "requires an argument");
|
||||||
}
|
}
|
||||||
String filename = args.get(0);
|
String argValue = args.get(0);
|
||||||
args.remove(0);
|
args.remove(0);
|
||||||
if (OPTION_INPUT_SOURCE.equals(arg)) {
|
if (OPTION_INPUT_SOURCE.equals(arg)) {
|
||||||
if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
|
if (XmlDictInputOutput.isXmlUnigramDictionary(argValue)) {
|
||||||
inputUnigramXml = filename;
|
inputUnigramXml = argValue;
|
||||||
} else if (CombinedInputOutput.isCombinedDictionary(filename)) {
|
} else if (CombinedInputOutput.isCombinedDictionary(argValue)) {
|
||||||
inputCombined = filename;
|
inputCombined = argValue;
|
||||||
} else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) {
|
} else if (BinaryDictDecoderUtils.isBinaryDictionary(argValue)) {
|
||||||
inputBinary = filename;
|
inputBinary = argValue;
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
"Unknown format for file " + filename);
|
"Unknown format for file " + argValue);
|
||||||
}
|
}
|
||||||
} else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
|
} else if (OPTION_INPUT_SHORTCUT_XML.equals(arg)) {
|
||||||
inputShortcutXml = filename;
|
inputShortcutXml = argValue;
|
||||||
} else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
|
} else if (OPTION_INPUT_BIGRAM_XML.equals(arg)) {
|
||||||
inputBigramXml = filename;
|
inputBigramXml = argValue;
|
||||||
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
|
} else if (OPTION_OUTPUT_BINARY.equals(arg)) {
|
||||||
outputBinary = filename;
|
outputBinary = argValue;
|
||||||
} else if (OPTION_OUTPUT_XML.equals(arg)) {
|
} else if (OPTION_OUTPUT_XML.equals(arg)) {
|
||||||
outputXml = filename;
|
outputXml = argValue;
|
||||||
} else if (OPTION_OUTPUT_COMBINED.equals(arg)) {
|
} else if (OPTION_OUTPUT_COMBINED.equals(arg)) {
|
||||||
outputCombined = filename;
|
outputCombined = argValue;
|
||||||
|
} else if (OPTION_CODE_POINT_TABLE.equals(arg)) {
|
||||||
|
if (OPTION_CODE_POINT_TABLE_OFF.equals(argValue)) {
|
||||||
|
codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_OFF;
|
||||||
|
} else if (OPTION_CODE_POINT_TABLE_ON.equals(argValue)) {
|
||||||
|
codePointTableMode = Ver2DictEncoder.CODE_POINT_TABLE_ON;
|
||||||
|
} else {
|
||||||
|
throw new IllegalArgumentException(
|
||||||
|
"Unknown argument to -t option : " + argValue);
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException("Unknown option : " + arg);
|
throw new IllegalArgumentException("Unknown option : " + arg);
|
||||||
}
|
}
|
||||||
|
@ -225,6 +240,7 @@ public class DictionaryMaker {
|
||||||
mOutputXml = outputXml;
|
mOutputXml = outputXml;
|
||||||
mOutputCombined = outputCombined;
|
mOutputCombined = outputCombined;
|
||||||
mOutputBinaryFormatVersion = outputBinaryFormatVersion;
|
mOutputBinaryFormatVersion = outputBinaryFormatVersion;
|
||||||
|
mCodePointTableMode = codePointTableMode;
|
||||||
checkIntegrity();
|
checkIntegrity();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -335,7 +351,8 @@ public class DictionaryMaker {
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException,
|
throws FileNotFoundException, IOException, UnsupportedFormatException,
|
||||||
IllegalArgumentException {
|
IllegalArgumentException {
|
||||||
if (null != args.mOutputBinary) {
|
if (null != args.mOutputBinary) {
|
||||||
writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion);
|
writeBinaryDictionary(args.mOutputBinary, dict, args.mOutputBinaryFormatVersion,
|
||||||
|
args.mCodePointTableMode);
|
||||||
}
|
}
|
||||||
if (null != args.mOutputXml) {
|
if (null != args.mOutputXml) {
|
||||||
writeXmlDictionary(args.mOutputXml, dict);
|
writeXmlDictionary(args.mOutputXml, dict);
|
||||||
|
@ -351,19 +368,21 @@ public class DictionaryMaker {
|
||||||
* @param outputFilename the name of the file to write to.
|
* @param outputFilename the name of the file to write to.
|
||||||
* @param dict the dictionary to write.
|
* @param dict the dictionary to write.
|
||||||
* @param version the binary format version to use.
|
* @param version the binary format version to use.
|
||||||
|
* @param codePointTableMode the value to decide how we treat the code point table.
|
||||||
* @throws FileNotFoundException if the output file can't be created.
|
* @throws FileNotFoundException if the output file can't be created.
|
||||||
* @throws IOException if the output file can't be written to.
|
* @throws IOException if the output file can't be written to.
|
||||||
*/
|
*/
|
||||||
private static void writeBinaryDictionary(final String outputFilename,
|
private static void writeBinaryDictionary(final String outputFilename,
|
||||||
final FusionDictionary dict, final int version)
|
final FusionDictionary dict, final int version, final int codePointTableMode)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
final File outputFile = new File(outputFilename);
|
final File outputFile = new File(outputFilename);
|
||||||
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
|
final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version);
|
||||||
final DictEncoder dictEncoder;
|
final DictEncoder dictEncoder;
|
||||||
if (version == FormatSpec.VERSION4) {
|
if (version == FormatSpec.VERSION4) {
|
||||||
|
// VERSION4 doesn't use the code point table.
|
||||||
dictEncoder = new Ver4DictEncoder(outputFile);
|
dictEncoder = new Ver4DictEncoder(outputFile);
|
||||||
} else {
|
} else {
|
||||||
dictEncoder = new Ver2DictEncoder(outputFile);
|
dictEncoder = new Ver2DictEncoder(outputFile, codePointTableMode);
|
||||||
}
|
}
|
||||||
dictEncoder.writeDictionary(dict, formatOptions);
|
dictEncoder.writeDictionary(dict, formatOptions);
|
||||||
}
|
}
|
||||||
|
|
Loading…
Reference in a new issue