am dbb0c785
: Merge "Refactor dicttool with try-with-resource"
* commit 'dbb0c78522fbe138706bc86f98d1597b6daa9dad': Refactor dicttool with try-with-resource
This commit is contained in:
commit
0b5468a0db
8 changed files with 196 additions and 182 deletions
|
@ -26,11 +26,13 @@ import org.xml.sax.SAXException;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.BufferedOutputStream;
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
|
||||||
|
@ -51,14 +53,17 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
public final static String ENCRYPTION = "encrypted";
|
public final static String ENCRYPTION = "encrypted";
|
||||||
|
|
||||||
private final static int MAX_DECODE_DEPTH = 8;
|
private final static int MAX_DECODE_DEPTH = 8;
|
||||||
|
private final static int COPY_BUFFER_SIZE = 8192;
|
||||||
|
|
||||||
public static class DecoderChainSpec {
|
public static class DecoderChainSpec {
|
||||||
ArrayList<String> mDecoderSpec = new ArrayList<>();
|
ArrayList<String> mDecoderSpec = new ArrayList<>();
|
||||||
File mFile;
|
File mFile;
|
||||||
|
|
||||||
public DecoderChainSpec addStep(final String stepDescription) {
|
public DecoderChainSpec addStep(final String stepDescription) {
|
||||||
mDecoderSpec.add(stepDescription);
|
mDecoderSpec.add(stepDescription);
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
|
|
||||||
public String describeChain() {
|
public String describeChain() {
|
||||||
final StringBuilder s = new StringBuilder("raw");
|
final StringBuilder s = new StringBuilder("raw");
|
||||||
for (final String step : mDecoderSpec) {
|
for (final String step : mDecoderSpec) {
|
||||||
|
@ -70,13 +75,10 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
||||||
final byte[] buffer = new byte[1000];
|
final byte[] buffer = new byte[COPY_BUFFER_SIZE];
|
||||||
final BufferedInputStream in = new BufferedInputStream(input);
|
for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
|
||||||
final BufferedOutputStream out = new BufferedOutputStream(output);
|
|
||||||
for (int readBytes = in.read(buffer); readBytes >= 0; readBytes = in.read(buffer))
|
|
||||||
output.write(buffer, 0, readBytes);
|
output.write(buffer, 0, readBytes);
|
||||||
in.close();
|
}
|
||||||
out.close();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -131,11 +133,15 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
try {
|
try {
|
||||||
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
||||||
dst.deleteOnExit();
|
dst.deleteOnExit();
|
||||||
final FileOutputStream dstStream = new FileOutputStream(dst);
|
try (
|
||||||
copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
|
final InputStream input = Compress.getUncompressedStream(
|
||||||
new BufferedOutputStream(dstStream)); // #copy() closes the streams
|
new BufferedInputStream(new FileInputStream(src)));
|
||||||
|
final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
|
||||||
|
) {
|
||||||
|
copy(input, output);
|
||||||
return dst;
|
return dst;
|
||||||
} catch (IOException e) {
|
}
|
||||||
|
} catch (final IOException e) {
|
||||||
// Could not uncompress the file: presumably the file is simply not a compressed file
|
// Could not uncompress the file: presumably the file is simply not a compressed file
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -150,20 +156,20 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
try {
|
try {
|
||||||
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
||||||
dst.deleteOnExit();
|
dst.deleteOnExit();
|
||||||
final FileOutputStream dstStream = new FileOutputStream(dst);
|
try (
|
||||||
copy(Crypt.getDecryptedStream(new BufferedInputStream(new FileInputStream(src))),
|
final InputStream input = Crypt.getDecryptedStream(
|
||||||
dstStream); // #copy() closes the streams
|
new BufferedInputStream(new FileInputStream(src)));
|
||||||
|
final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
|
||||||
|
) {
|
||||||
|
copy(input, output);
|
||||||
return dst;
|
return dst;
|
||||||
} catch (IOException e) {
|
}
|
||||||
|
} catch (final IOException e) {
|
||||||
// Could not decrypt the file: presumably the file is simply not a crypted file
|
// Could not decrypt the file: presumably the file is simply not a crypted file
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void crash(final String filename, final Exception e) {
|
|
||||||
throw new RuntimeException("Can't read file " + filename, e);
|
|
||||||
}
|
|
||||||
|
|
||||||
static FusionDictionary getDictionary(final String filename, final boolean report) {
|
static FusionDictionary getDictionary(final String filename, final boolean report) {
|
||||||
final File file = new File(filename);
|
final File file = new File(filename);
|
||||||
if (report) {
|
if (report) {
|
||||||
|
@ -172,25 +178,28 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
|
if (XmlDictInputOutput.isXmlUnigramDictionary(filename)) {
|
||||||
if (report) System.out.println("Format : XML unigram list");
|
if (report) {
|
||||||
|
System.out.println("Format : XML unigram list");
|
||||||
|
}
|
||||||
return XmlDictInputOutput.readDictionaryXml(
|
return XmlDictInputOutput.readDictionaryXml(
|
||||||
new BufferedInputStream(new FileInputStream(file)),
|
new BufferedInputStream(new FileInputStream(file)),
|
||||||
null /* shortcuts */, null /* bigrams */);
|
null /* shortcuts */, null /* bigrams */);
|
||||||
} else {
|
}
|
||||||
final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
|
final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
|
||||||
if (null == decodedSpec) {
|
if (null == decodedSpec) {
|
||||||
crash(filename, new RuntimeException(
|
throw new RuntimeException("Does not seem to be a dictionary file " + filename);
|
||||||
filename + " does not seem to be a dictionary file"));
|
}
|
||||||
} else if (CombinedInputOutput.isCombinedDictionary(
|
if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
|
||||||
decodedSpec.mFile.getAbsolutePath())){
|
|
||||||
if (report) {
|
if (report) {
|
||||||
System.out.println("Format : Combined format");
|
System.out.println("Format : Combined format");
|
||||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
||||||
}
|
}
|
||||||
return CombinedInputOutput.readDictionaryCombined(
|
try (final BufferedReader reader = new BufferedReader(
|
||||||
new BufferedInputStream(new FileInputStream(decodedSpec.mFile)));
|
new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
|
||||||
} else {
|
return CombinedInputOutput.readDictionaryCombined(reader);
|
||||||
|
}
|
||||||
|
}
|
||||||
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
|
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
|
||||||
decodedSpec.mFile, 0, decodedSpec.mFile.length(),
|
decodedSpec.mFile, 0, decodedSpec.mFile.length(),
|
||||||
DictDecoder.USE_BYTEARRAY);
|
DictDecoder.USE_BYTEARRAY);
|
||||||
|
@ -200,17 +209,9 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
||||||
}
|
}
|
||||||
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
||||||
|
} catch (final IOException | SAXException | ParserConfigurationException |
|
||||||
|
UnsupportedFormatException e) {
|
||||||
|
throw new RuntimeException("Can't read file " + filename, e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
|
||||||
crash(filename, e);
|
|
||||||
} catch (SAXException e) {
|
|
||||||
crash(filename, e);
|
|
||||||
} catch (ParserConfigurationException e) {
|
|
||||||
crash(filename, e);
|
|
||||||
} catch (UnsupportedFormatException e) {
|
|
||||||
crash(filename, e);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -26,13 +26,9 @@ import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
import com.android.inputmethod.latin.utils.CombinedFormatUtils;
|
||||||
|
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.BufferedWriter;
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.FileReader;
|
import java.io.FileReader;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
|
||||||
import java.io.InputStreamReader;
|
|
||||||
import java.io.Writer;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
@ -57,27 +53,15 @@ public class CombinedInputOutput {
|
||||||
* @return true if the file is in the combined format, false otherwise
|
* @return true if the file is in the combined format, false otherwise
|
||||||
*/
|
*/
|
||||||
public static boolean isCombinedDictionary(final String filename) {
|
public static boolean isCombinedDictionary(final String filename) {
|
||||||
BufferedReader reader = null;
|
try (final BufferedReader reader = new BufferedReader(new FileReader(filename))) {
|
||||||
try {
|
|
||||||
reader = new BufferedReader(new FileReader(new File(filename)));
|
|
||||||
String firstLine = reader.readLine();
|
String firstLine = reader.readLine();
|
||||||
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
|
while (firstLine.startsWith(COMMENT_LINE_STARTER)) {
|
||||||
firstLine = reader.readLine();
|
firstLine = reader.readLine();
|
||||||
}
|
}
|
||||||
return firstLine.matches(
|
return firstLine.matches(
|
||||||
"^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
|
"^" + CombinedFormatUtils.DICTIONARY_TAG + "=[^:]+(:[^=]+=[^:]+)*");
|
||||||
} catch (FileNotFoundException e) {
|
} catch (final IOException e) {
|
||||||
return false;
|
return false;
|
||||||
} catch (IOException e) {
|
|
||||||
return false;
|
|
||||||
} finally {
|
|
||||||
if (reader != null) {
|
|
||||||
try {
|
|
||||||
reader.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -87,12 +71,11 @@ public class CombinedInputOutput {
|
||||||
* This is the public method that will read a combined file and return the corresponding memory
|
* This is the public method that will read a combined file and return the corresponding memory
|
||||||
* representation.
|
* representation.
|
||||||
*
|
*
|
||||||
* @param source the file to read the data from.
|
* @param reader the buffered reader to read the data from.
|
||||||
* @return the in-memory representation of the dictionary.
|
* @return the in-memory representation of the dictionary.
|
||||||
*/
|
*/
|
||||||
public static FusionDictionary readDictionaryCombined(final InputStream source)
|
public static FusionDictionary readDictionaryCombined(final BufferedReader reader)
|
||||||
throws IOException {
|
throws IOException {
|
||||||
final BufferedReader reader = new BufferedReader(new InputStreamReader(source, "UTF-8"));
|
|
||||||
String headerLine = reader.readLine();
|
String headerLine = reader.readLine();
|
||||||
while (headerLine.startsWith(COMMENT_LINE_STARTER)) {
|
while (headerLine.startsWith(COMMENT_LINE_STARTER)) {
|
||||||
headerLine = reader.readLine();
|
headerLine = reader.readLine();
|
||||||
|
@ -218,11 +201,11 @@ public class CombinedInputOutput {
|
||||||
/**
|
/**
|
||||||
* Writes a dictionary to a combined file.
|
* Writes a dictionary to a combined file.
|
||||||
*
|
*
|
||||||
* @param destination a destination stream to write to.
|
* @param destination a destination writer.
|
||||||
* @param dict the dictionary to write.
|
* @param dict the dictionary to write.
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryCombined(
|
public static void writeDictionaryCombined(final BufferedWriter destination,
|
||||||
final Writer destination, final FusionDictionary dict) throws IOException {
|
final FusionDictionary dict) throws IOException {
|
||||||
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
|
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
|
||||||
for (final WordProperty wordProperty : dict) {
|
for (final WordProperty wordProperty : dict) {
|
||||||
// This for ordering by frequency, then by asciibetic order
|
// This for ordering by frequency, then by asciibetic order
|
||||||
|
@ -232,6 +215,5 @@ public class CombinedInputOutput {
|
||||||
for (final WordProperty wordProperty : wordPropertiesInDict) {
|
for (final WordProperty wordProperty : wordPropertiesInDict) {
|
||||||
destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
|
destination.write(CombinedFormatUtils.formatWordProperty(wordProperty));
|
||||||
}
|
}
|
||||||
destination.close();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,11 +16,6 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
import java.io.BufferedInputStream;
|
|
||||||
import java.io.BufferedOutputStream;
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
@ -32,8 +27,7 @@ public class Compress {
|
||||||
// This container class is not publicly instantiable.
|
// This container class is not publicly instantiable.
|
||||||
}
|
}
|
||||||
|
|
||||||
public static OutputStream getCompressedStream(final OutputStream out)
|
public static OutputStream getCompressedStream(final OutputStream out) throws IOException {
|
||||||
throws java.io.IOException {
|
|
||||||
return new GZIPOutputStream(out);
|
return new GZIPOutputStream(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -43,7 +37,6 @@ public class Compress {
|
||||||
|
|
||||||
static public class Compressor extends Dicttool.Command {
|
static public class Compressor extends Dicttool.Command {
|
||||||
public static final String COMMAND = "compress";
|
public static final String COMMAND = "compress";
|
||||||
public static final String STDIN_OR_STDOUT = "-";
|
|
||||||
|
|
||||||
public Compressor() {
|
public Compressor() {
|
||||||
}
|
}
|
||||||
|
@ -61,17 +54,18 @@ public class Compress {
|
||||||
}
|
}
|
||||||
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
||||||
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
||||||
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
try (
|
||||||
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
|
final InputStream input = getFileInputStreamOrStdIn(inFilename);
|
||||||
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
final OutputStream compressedOutput = getCompressedStream(
|
||||||
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
|
getFileOutputStreamOrStdOut(outFilename))
|
||||||
BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
|
) {
|
||||||
|
BinaryDictOffdeviceUtils.copy(input, compressedOutput);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static public class Uncompressor extends Dicttool.Command {
|
static public class Uncompressor extends Dicttool.Command {
|
||||||
public static final String COMMAND = "uncompress";
|
public static final String COMMAND = "uncompress";
|
||||||
public static final String STDIN_OR_STDOUT = "-";
|
|
||||||
|
|
||||||
public Uncompressor() {
|
public Uncompressor() {
|
||||||
}
|
}
|
||||||
|
@ -89,11 +83,13 @@ public class Compress {
|
||||||
}
|
}
|
||||||
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
||||||
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
||||||
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
try (
|
||||||
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
|
final InputStream uncompressedInput = getUncompressedStream(
|
||||||
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
getFileInputStreamOrStdIn(inFilename));
|
||||||
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
|
final OutputStream output = getFileOutputStreamOrStdOut(outFilename)
|
||||||
BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
|
) {
|
||||||
|
BinaryDictOffdeviceUtils.copy(uncompressedInput, output);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -27,19 +27,23 @@ import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
|
import com.android.inputmethod.latin.makedict.Ver2DictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.Ver4DictEncoder;
|
import com.android.inputmethod.latin.makedict.Ver4DictEncoder;
|
||||||
|
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.BufferedReader;
|
||||||
import java.io.BufferedWriter;
|
import java.io.BufferedWriter;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.FileWriter;
|
import java.io.FileWriter;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.InputStreamReader;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.LinkedList;
|
import java.util.LinkedList;
|
||||||
|
|
||||||
import javax.xml.parsers.ParserConfigurationException;
|
import javax.xml.parsers.ParserConfigurationException;
|
||||||
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Main class/method for DictionaryMaker.
|
* Main class/method for DictionaryMaker.
|
||||||
*/
|
*/
|
||||||
|
@ -279,20 +283,19 @@ public class DictionaryMaker {
|
||||||
*/
|
*/
|
||||||
private static FusionDictionary readCombinedFile(final String combinedFilename)
|
private static FusionDictionary readCombinedFile(final String combinedFilename)
|
||||||
throws FileNotFoundException, IOException {
|
throws FileNotFoundException, IOException {
|
||||||
FileInputStream inStream = null;
|
try (final BufferedReader reader = new BufferedReader(new InputStreamReader(
|
||||||
try {
|
new FileInputStream(combinedFilename), "UTF-8"))
|
||||||
final File file = new File(combinedFilename);
|
) {
|
||||||
inStream = new FileInputStream(file);
|
return CombinedInputOutput.readDictionaryCombined(reader);
|
||||||
return CombinedInputOutput.readDictionaryCombined(inStream);
|
|
||||||
} finally {
|
|
||||||
if (null != inStream) {
|
|
||||||
try {
|
|
||||||
inStream.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// do nothing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static BufferedInputStream getBufferedFileInputStream(final String filename)
|
||||||
|
throws FileNotFoundException {
|
||||||
|
if (filename == null) {
|
||||||
|
return null;
|
||||||
}
|
}
|
||||||
|
return new BufferedInputStream(new FileInputStream(filename));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -310,13 +313,14 @@ public class DictionaryMaker {
|
||||||
private static FusionDictionary readXmlFile(final String unigramXmlFilename,
|
private static FusionDictionary readXmlFile(final String unigramXmlFilename,
|
||||||
final String shortcutXmlFilename, final String bigramXmlFilename)
|
final String shortcutXmlFilename, final String bigramXmlFilename)
|
||||||
throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
|
throws FileNotFoundException, SAXException, IOException, ParserConfigurationException {
|
||||||
final FileInputStream unigrams = new FileInputStream(new File(unigramXmlFilename));
|
try (
|
||||||
final FileInputStream shortcuts = null == shortcutXmlFilename ? null :
|
final BufferedInputStream unigrams = getBufferedFileInputStream(unigramXmlFilename);
|
||||||
new FileInputStream(new File(shortcutXmlFilename));
|
final BufferedInputStream shortcuts = getBufferedFileInputStream(shortcutXmlFilename);
|
||||||
final FileInputStream bigrams = null == bigramXmlFilename ? null :
|
final BufferedInputStream bigrams = getBufferedFileInputStream(bigramXmlFilename);
|
||||||
new FileInputStream(new File(bigramXmlFilename));
|
) {
|
||||||
return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
|
return XmlDictInputOutput.readDictionaryXml(unigrams, shortcuts, bigrams);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Invoke the right output method according to args.
|
* Invoke the right output method according to args.
|
||||||
|
@ -374,8 +378,9 @@ public class DictionaryMaker {
|
||||||
*/
|
*/
|
||||||
private static void writeXmlDictionary(final String outputFilename,
|
private static void writeXmlDictionary(final String outputFilename,
|
||||||
final FusionDictionary dict) throws FileNotFoundException, IOException {
|
final FusionDictionary dict) throws FileNotFoundException, IOException {
|
||||||
XmlDictInputOutput.writeDictionaryXml(new BufferedWriter(new FileWriter(outputFilename)),
|
try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) {
|
||||||
dict);
|
XmlDictInputOutput.writeDictionaryXml(writer, dict);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -388,7 +393,8 @@ public class DictionaryMaker {
|
||||||
*/
|
*/
|
||||||
private static void writeCombinedDictionary(final String outputFilename,
|
private static void writeCombinedDictionary(final String outputFilename,
|
||||||
final FusionDictionary dict) throws FileNotFoundException, IOException {
|
final FusionDictionary dict) throws FileNotFoundException, IOException {
|
||||||
CombinedInputOutput.writeDictionaryCombined(
|
try (final BufferedWriter writer = new BufferedWriter(new FileWriter(outputFilename))) {
|
||||||
new BufferedWriter(new FileWriter(outputFilename)), dict);
|
CombinedInputOutput.writeDictionaryCombined(writer, dict);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,23 +16,63 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
|
|
||||||
public class Dicttool {
|
public class Dicttool {
|
||||||
|
|
||||||
public static abstract class Command {
|
public static abstract class Command {
|
||||||
|
public static final String STDIN_OR_STDOUT = "-";
|
||||||
protected String[] mArgs;
|
protected String[] mArgs;
|
||||||
|
|
||||||
public void setArgs(String[] args) throws IllegalArgumentException {
|
public void setArgs(String[] args) throws IllegalArgumentException {
|
||||||
mArgs = args;
|
mArgs = args;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
protected static InputStream getFileInputStreamOrStdIn(final String inFilename)
|
||||||
|
throws FileNotFoundException {
|
||||||
|
if (STDIN_OR_STDOUT.equals(inFilename)) {
|
||||||
|
return System.in;
|
||||||
|
}
|
||||||
|
return getFileInputStream(new File(inFilename));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static InputStream getFileInputStream(final File inFile)
|
||||||
|
throws FileNotFoundException {
|
||||||
|
return new BufferedInputStream(new FileInputStream(inFile));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static OutputStream getFileOutputStreamOrStdOut(final String outFilename)
|
||||||
|
throws FileNotFoundException {
|
||||||
|
if (STDIN_OR_STDOUT.equals(outFilename)) {
|
||||||
|
return System.out;
|
||||||
|
}
|
||||||
|
return getFileOutputStream(new File(outFilename));
|
||||||
|
}
|
||||||
|
|
||||||
|
protected static OutputStream getFileOutputStream(final File outFile)
|
||||||
|
throws FileNotFoundException {
|
||||||
|
return new BufferedOutputStream(new FileOutputStream(outFile));
|
||||||
|
}
|
||||||
|
|
||||||
abstract public String getHelp();
|
abstract public String getHelp();
|
||||||
abstract public void run() throws Exception;
|
abstract public void run() throws Exception;
|
||||||
}
|
}
|
||||||
|
|
||||||
static HashMap<String, Class<? extends Command>> sCommands = new HashMap<>();
|
static HashMap<String, Class<? extends Command>> sCommands = new HashMap<>();
|
||||||
|
|
||||||
static {
|
static {
|
||||||
CommandList.populate();
|
CommandList.populate();
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void addCommand(final String commandName, final Class<? extends Command> cls) {
|
public static void addCommand(final String commandName, final Class<? extends Command> cls) {
|
||||||
sCommands.put(commandName, cls);
|
sCommands.put(commandName, cls);
|
||||||
}
|
}
|
||||||
|
@ -60,7 +100,7 @@ public class Dicttool {
|
||||||
return sCommands.containsKey(commandName);
|
return sCommands.containsKey(commandName);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Command getCommand(final String[] arguments) {
|
private static Command getCommand(final String[] arguments) {
|
||||||
final String commandName = arguments[0];
|
final String commandName = arguments[0];
|
||||||
if (!isCommand(commandName)) {
|
if (!isCommand(commandName)) {
|
||||||
throw new RuntimeException("Unknown command : " + commandName);
|
throw new RuntimeException("Unknown command : " + commandName);
|
||||||
|
@ -76,7 +116,7 @@ public class Dicttool {
|
||||||
* @param arguments the arguments passed to dicttool.
|
* @param arguments the arguments passed to dicttool.
|
||||||
* @return 0 for success, an error code otherwise (always 1 at the moment)
|
* @return 0 for success, an error code otherwise (always 1 at the moment)
|
||||||
*/
|
*/
|
||||||
private int execute(final String[] arguments) {
|
private static int execute(final String[] arguments) {
|
||||||
final Command command = getCommand(arguments);
|
final Command command = getCommand(arguments);
|
||||||
try {
|
try {
|
||||||
command.run();
|
command.run();
|
||||||
|
@ -95,6 +135,6 @@ public class Dicttool {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// Exit with the success/error code from #execute() as status.
|
// Exit with the success/error code from #execute() as status.
|
||||||
System.exit(new Dicttool().execute(arguments));
|
System.exit(execute(arguments));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -21,8 +21,9 @@ import java.io.BufferedOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.FileOutputStream;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.InputStream;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
|
||||||
public class Package {
|
public class Package {
|
||||||
private Package() {
|
private Package() {
|
||||||
|
@ -86,9 +87,13 @@ public class Package {
|
||||||
}
|
}
|
||||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
||||||
final FileOutputStream dstStream = new FileOutputStream(new File(mArgs[1]));
|
try (
|
||||||
BinaryDictOffdeviceUtils.copy(new BufferedInputStream(
|
final InputStream input = getFileInputStream(decodedSpec.mFile);
|
||||||
new FileInputStream(decodedSpec.mFile)), new BufferedOutputStream(dstStream));
|
final OutputStream output = new BufferedOutputStream(
|
||||||
|
getFileOutputStreamOrStdOut(mArgs[1]))
|
||||||
|
) {
|
||||||
|
BinaryDictOffdeviceUtils.copy(input, output);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -23,13 +23,16 @@ import com.android.inputmethod.latin.makedict.ProbabilityInfo;
|
||||||
import com.android.inputmethod.latin.makedict.WeightedString;
|
import com.android.inputmethod.latin.makedict.WeightedString;
|
||||||
import com.android.inputmethod.latin.makedict.WordProperty;
|
import com.android.inputmethod.latin.makedict.WordProperty;
|
||||||
|
|
||||||
|
import org.xml.sax.Attributes;
|
||||||
|
import org.xml.sax.SAXException;
|
||||||
|
import org.xml.sax.helpers.DefaultHandler;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.BufferedWriter;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileReader;
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStreamReader;
|
||||||
import java.io.Writer;
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.TreeSet;
|
import java.util.TreeSet;
|
||||||
|
@ -38,10 +41,6 @@ import javax.xml.parsers.ParserConfigurationException;
|
||||||
import javax.xml.parsers.SAXParser;
|
import javax.xml.parsers.SAXParser;
|
||||||
import javax.xml.parsers.SAXParserFactory;
|
import javax.xml.parsers.SAXParserFactory;
|
||||||
|
|
||||||
import org.xml.sax.Attributes;
|
|
||||||
import org.xml.sax.SAXException;
|
|
||||||
import org.xml.sax.helpers.DefaultHandler;
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads and writes XML files for a FusionDictionary.
|
* Reads and writes XML files for a FusionDictionary.
|
||||||
*
|
*
|
||||||
|
@ -57,8 +56,6 @@ public class XmlDictInputOutput {
|
||||||
private static final String WORD_ATTR = "word";
|
private static final String WORD_ATTR = "word";
|
||||||
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
private static final String NOT_A_WORD_ATTR = "not_a_word";
|
||||||
|
|
||||||
private static final String OPTIONS_KEY = "options";
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* SAX handler for a unigram XML file.
|
* SAX handler for a unigram XML file.
|
||||||
*/
|
*/
|
||||||
|
@ -120,7 +117,6 @@ public class XmlDictInputOutput {
|
||||||
final String attrName = attrs.getLocalName(attrIndex);
|
final String attrName = attrs.getLocalName(attrIndex);
|
||||||
attributes.put(attrName, attrs.getValue(attrIndex));
|
attributes.put(attrName, attrs.getValue(attrIndex));
|
||||||
}
|
}
|
||||||
final String optionsString = attributes.get(OPTIONS_KEY);
|
|
||||||
mDictionary = new FusionDictionary(new PtNodeArray(),
|
mDictionary = new FusionDictionary(new PtNodeArray(),
|
||||||
new DictionaryOptions(attributes));
|
new DictionaryOptions(attributes));
|
||||||
} else {
|
} else {
|
||||||
|
@ -244,7 +240,7 @@ public class XmlDictInputOutput {
|
||||||
protected int getValueFromFreqString(final String freqString) {
|
protected int getValueFromFreqString(final String freqString) {
|
||||||
if (WHITELIST_MARKER.equals(freqString)) {
|
if (WHITELIST_MARKER.equals(freqString)) {
|
||||||
return WHITELIST_FREQ_VALUE;
|
return WHITELIST_FREQ_VALUE;
|
||||||
} else {
|
}
|
||||||
final int intValue = super.getValueFromFreqString(freqString);
|
final int intValue = super.getValueFromFreqString(freqString);
|
||||||
if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
|
if (intValue < MIN_FREQ || intValue > MAX_FREQ) {
|
||||||
throw new RuntimeException("Shortcut freq out of range. Accepted range is "
|
throw new RuntimeException("Shortcut freq out of range. Accepted range is "
|
||||||
|
@ -252,7 +248,6 @@ public class XmlDictInputOutput {
|
||||||
}
|
}
|
||||||
return intValue;
|
return intValue;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// As per getAssocMap(), this never returns null.
|
// As per getAssocMap(), this never returns null.
|
||||||
public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() {
|
public HashMap<String, ArrayList<WeightedString>> getShortcutAndWhitelistMap() {
|
||||||
|
@ -269,23 +264,12 @@ public class XmlDictInputOutput {
|
||||||
* @return true if the file is in the unigram XML format, false otherwise
|
* @return true if the file is in the unigram XML format, false otherwise
|
||||||
*/
|
*/
|
||||||
public static boolean isXmlUnigramDictionary(final String filename) {
|
public static boolean isXmlUnigramDictionary(final String filename) {
|
||||||
BufferedReader reader = null;
|
try (final BufferedReader reader = new BufferedReader(
|
||||||
try {
|
new InputStreamReader(new FileInputStream(filename), "UTF-8"))) {
|
||||||
reader = new BufferedReader(new FileReader(new File(filename)));
|
|
||||||
final String firstLine = reader.readLine();
|
final String firstLine = reader.readLine();
|
||||||
return firstLine.matches("^\\s*<wordlist .*>\\s*$");
|
return firstLine.matches("^\\s*<wordlist .*>\\s*$");
|
||||||
} catch (FileNotFoundException e) {
|
} catch (final IOException e) {
|
||||||
return false;
|
return false;
|
||||||
} catch (IOException e) {
|
|
||||||
return false;
|
|
||||||
} finally {
|
|
||||||
if (reader != null) {
|
|
||||||
try {
|
|
||||||
reader.close();
|
|
||||||
} catch (IOException e) {
|
|
||||||
// do nothing
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -300,8 +284,8 @@ public class XmlDictInputOutput {
|
||||||
* @param bigrams the file to read the bigrams from, or null.
|
* @param bigrams the file to read the bigrams from, or null.
|
||||||
* @return the in-memory representation of the dictionary.
|
* @return the in-memory representation of the dictionary.
|
||||||
*/
|
*/
|
||||||
public static FusionDictionary readDictionaryXml(final InputStream unigrams,
|
public static FusionDictionary readDictionaryXml(final BufferedInputStream unigrams,
|
||||||
final InputStream shortcuts, final InputStream bigrams)
|
final BufferedInputStream shortcuts, final BufferedInputStream bigrams)
|
||||||
throws SAXException, IOException, ParserConfigurationException {
|
throws SAXException, IOException, ParserConfigurationException {
|
||||||
final SAXParserFactory factory = SAXParserFactory.newInstance();
|
final SAXParserFactory factory = SAXParserFactory.newInstance();
|
||||||
factory.setNamespaceAware(true);
|
factory.setNamespaceAware(true);
|
||||||
|
@ -350,8 +334,8 @@ public class XmlDictInputOutput {
|
||||||
* @param destination a destination stream to write to.
|
* @param destination a destination stream to write to.
|
||||||
* @param dict the dictionary to write.
|
* @param dict the dictionary to write.
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryXml(Writer destination, FusionDictionary dict)
|
public static void writeDictionaryXml(final BufferedWriter destination,
|
||||||
throws IOException {
|
final FusionDictionary dict) throws IOException {
|
||||||
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
|
final TreeSet<WordProperty> wordPropertiesInDict = new TreeSet<>();
|
||||||
for (WordProperty wordProperty : dict) {
|
for (WordProperty wordProperty : dict) {
|
||||||
wordPropertiesInDict.add(wordProperty);
|
wordPropertiesInDict.add(wordProperty);
|
||||||
|
|
|
@ -62,13 +62,13 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
|
|
||||||
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
dst.deleteOnExit();
|
dst.deleteOnExit();
|
||||||
|
try (final OutputStream out = Compress.getCompressedStream(
|
||||||
final OutputStream out = Compress.getCompressedStream(
|
|
||||||
Compress.getCompressedStream(
|
Compress.getCompressedStream(
|
||||||
Compress.getCompressedStream(
|
Compress.getCompressedStream(
|
||||||
new BufferedOutputStream(new FileOutputStream(dst)))));
|
new BufferedOutputStream(new FileOutputStream(dst)))))) {
|
||||||
final DictEncoder dictEncoder = new Ver2DictEncoder(out);
|
final DictEncoder dictEncoder = new Ver2DictEncoder(out);
|
||||||
dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
|
dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
|
||||||
|
}
|
||||||
|
|
||||||
// Test for an actually compressed dictionary and its contents
|
// Test for an actually compressed dictionary and its contents
|
||||||
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
||||||
|
@ -96,11 +96,11 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
// Randomly create some 4k file containing garbage
|
// Randomly create some 4k file containing garbage
|
||||||
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
dst.deleteOnExit();
|
dst.deleteOnExit();
|
||||||
final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst));
|
try (final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst))) {
|
||||||
for (int i = 0; i < 1024; ++i) {
|
for (int i = 0; i < 1024; ++i) {
|
||||||
out.write(0x12345678);
|
out.write(0x12345678);
|
||||||
}
|
}
|
||||||
out.close();
|
}
|
||||||
|
|
||||||
// Test that a random data file actually fails
|
// Test that a random data file actually fails
|
||||||
assertNull("Wrongly identified data file",
|
assertNull("Wrongly identified data file",
|
||||||
|
@ -108,12 +108,12 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
|
|
||||||
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
|
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
gzDst.deleteOnExit();
|
gzDst.deleteOnExit();
|
||||||
final OutputStream gzOut =
|
try (final OutputStream gzOut = Compress.getCompressedStream(
|
||||||
Compress.getCompressedStream(new BufferedOutputStream(new FileOutputStream(gzDst)));
|
new BufferedOutputStream(new FileOutputStream(gzDst)))) {
|
||||||
for (int i = 0; i < 1024; ++i) {
|
for (int i = 0; i < 1024; ++i) {
|
||||||
gzOut.write(0x12345678);
|
gzOut.write(0x12345678);
|
||||||
}
|
}
|
||||||
gzOut.close();
|
}
|
||||||
|
|
||||||
// Test that a compressed random data file actually fails
|
// Test that a compressed random data file actually fails
|
||||||
assertNull("Wrongly identified data file",
|
assertNull("Wrongly identified data file",
|
||||||
|
|
Loading…
Reference in a new issue