am 5564317f
: Genericize getting a raw dictionary
* commit '5564317f837164c67749423fa78f917ed2ae9e14': Genericize getting a raw dictionary
This commit is contained in:
commit
ce34cb9b83
3 changed files with 71 additions and 48 deletions
|
@ -35,6 +35,9 @@ import java.io.InputStreamReader;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
|
|
||||||
|
import javax.annotation.Nonnull;
|
||||||
|
import javax.annotation.Nullable;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class grouping utilities for offline dictionary making.
|
* Class grouping utilities for offline dictionary making.
|
||||||
*
|
*
|
||||||
|
@ -45,10 +48,9 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
// Prefix and suffix are arbitrary, the values do not really matter
|
// Prefix and suffix are arbitrary, the values do not really matter
|
||||||
private final static String PREFIX = "dicttool";
|
private final static String PREFIX = "dicttool";
|
||||||
private final static String SUFFIX = ".tmp";
|
private final static String SUFFIX = ".tmp";
|
||||||
|
|
||||||
private final static int COPY_BUFFER_SIZE = 8192;
|
private final static int COPY_BUFFER_SIZE = 8192;
|
||||||
|
|
||||||
public static class DecoderChainSpec {
|
public static class DecoderChainSpec<T> {
|
||||||
public final static int COMPRESSION = 1;
|
public final static int COMPRESSION = 1;
|
||||||
public final static int ENCRYPTION = 2;
|
public final static int ENCRYPTION = 2;
|
||||||
|
|
||||||
|
@ -57,16 +59,16 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
};
|
};
|
||||||
|
|
||||||
private final int mDecoderSpecIndex;
|
private final int mDecoderSpecIndex;
|
||||||
File mFile;
|
T mResult;
|
||||||
|
|
||||||
public DecoderChainSpec() {
|
public DecoderChainSpec() {
|
||||||
mDecoderSpecIndex = 0;
|
mDecoderSpecIndex = 0;
|
||||||
mFile = null;
|
mResult = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
private DecoderChainSpec(final DecoderChainSpec src) {
|
private DecoderChainSpec(final DecoderChainSpec<T> src) {
|
||||||
mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
|
mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
|
||||||
mFile = src.mFile;
|
mResult = src.mResult;
|
||||||
}
|
}
|
||||||
|
|
||||||
private String getStepDescription(final int step) {
|
private String getStepDescription(final int step) {
|
||||||
|
@ -115,6 +117,31 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public interface InputProcessor<T> {
|
||||||
|
@Nonnull
|
||||||
|
public T process(@Nonnull final InputStream input)
|
||||||
|
throws IOException, UnsupportedFormatException;
|
||||||
|
}
|
||||||
|
|
||||||
|
public static class CopyProcessor implements InputProcessor<File> {
|
||||||
|
@Override @Nonnull
|
||||||
|
public File process(@Nonnull final InputStream input) throws IOException,
|
||||||
|
UnsupportedFormatException {
|
||||||
|
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
||||||
|
dst.deleteOnExit();
|
||||||
|
try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
|
||||||
|
copy(input, output);
|
||||||
|
output.flush();
|
||||||
|
output.close();
|
||||||
|
if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
|
||||||
|
|| CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
|
||||||
|
return dst;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
throw new UnsupportedFormatException("Input stream not at the expected format");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
||||||
final byte[] buffer = new byte[COPY_BUFFER_SIZE];
|
final byte[] buffer = new byte[COPY_BUFFER_SIZE];
|
||||||
for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
|
for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
|
||||||
|
@ -123,46 +150,42 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Returns a decrypted/uncompressed dictionary.
|
* Process a dictionary, decrypting/uncompressing it on the fly as necessary.
|
||||||
*
|
*
|
||||||
* This will decrypt/uncompress any number of times as necessary until it finds the
|
* This will execute the given processor repeatedly with the possible alternatives
|
||||||
* dictionary signature, and copy the decoded file to a temporary place.
|
* for dictionary format until the processor does not throw an exception.
|
||||||
* If this is not a dictionary, the method returns null.
|
* If the processor succeeds for none of the possible formats, the method returns null.
|
||||||
*/
|
*/
|
||||||
public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
|
@Nullable
|
||||||
DecoderChainSpec spec = new DecoderChainSpec();
|
public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
|
||||||
if (BinaryDictDecoderUtils.isBinaryDictionary(src)
|
@Nonnull final InputProcessor<T> processor) {
|
||||||
|| CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
|
@Nonnull DecoderChainSpec spec = new DecoderChainSpec();
|
||||||
spec.mFile = src;
|
|
||||||
return spec;
|
|
||||||
}
|
|
||||||
while (null != spec) {
|
while (null != spec) {
|
||||||
try {
|
try {
|
||||||
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
try (final InputStream input = spec.getStream(src)) {
|
||||||
dst.deleteOnExit();
|
spec.mResult = processor.process(input);
|
||||||
try (final InputStream input = spec.getStream(src);
|
return spec;
|
||||||
final OutputStream output =
|
}
|
||||||
new BufferedOutputStream(new FileOutputStream(dst))) {
|
} catch (IOException | UnsupportedFormatException e) {
|
||||||
copy(input, output);
|
// If the format is not the right one for this file, the processor will throw one
|
||||||
output.flush();
|
// of these exceptions. In our case, that means we should try the next spec,
|
||||||
output.close();
|
// since it may still be at another format we haven't tried yet.
|
||||||
if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
|
// TODO: stop using exceptions for this non-exceptional case.
|
||||||
|| CombinedInputOutput.isCombinedDictionary(
|
|
||||||
dst.getAbsolutePath())) {
|
|
||||||
spec.mFile = dst;
|
|
||||||
return spec;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} catch (IOException e) {
|
|
||||||
// This was not the right format, fall through and try the next
|
|
||||||
System.out.println("Rejecting " + spec.describeChain() + " : " + e);
|
|
||||||
System.out.println(e.getStackTrace()[0].toString());
|
|
||||||
}
|
}
|
||||||
spec = spec.next();
|
spec = spec.next();
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Get a decoder chain spec with a raw dictionary file. This makes a new file on the
|
||||||
|
* disk ready for any treatment the client wants.
|
||||||
|
*/
|
||||||
|
@Nullable
|
||||||
|
public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
|
||||||
|
return decodeDictionaryForProcess(src, new CopyProcessor());
|
||||||
|
}
|
||||||
|
|
||||||
static FusionDictionary getDictionary(final String filename, final boolean report) {
|
static FusionDictionary getDictionary(final String filename, final boolean report) {
|
||||||
final File file = new File(filename);
|
final File file = new File(filename);
|
||||||
if (report) {
|
if (report) {
|
||||||
|
@ -170,28 +193,28 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
System.out.println("Size : " + file.length() + " bytes");
|
System.out.println("Size : " + file.length() + " bytes");
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file);
|
final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
|
||||||
if (null == decodedSpec) {
|
if (null == decodedSpec) {
|
||||||
throw new RuntimeException("Does not seem to be a dictionary file " + filename);
|
throw new RuntimeException("Does not seem to be a dictionary file " + filename);
|
||||||
}
|
}
|
||||||
if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) {
|
if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
|
||||||
if (report) {
|
if (report) {
|
||||||
System.out.println("Format : Combined format");
|
System.out.println("Format : Combined format");
|
||||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
|
||||||
}
|
}
|
||||||
try (final BufferedReader reader = new BufferedReader(
|
try (final BufferedReader reader = new BufferedReader(
|
||||||
new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) {
|
new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
|
||||||
return CombinedInputOutput.readDictionaryCombined(reader);
|
return CombinedInputOutput.readDictionaryCombined(reader);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
|
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
|
||||||
decodedSpec.mFile, 0, decodedSpec.mFile.length(),
|
decodedSpec.mResult, 0, decodedSpec.mResult.length(),
|
||||||
DictDecoder.USE_BYTEARRAY);
|
DictDecoder.USE_BYTEARRAY);
|
||||||
if (report) {
|
if (report) {
|
||||||
System.out.println("Format : Binary dictionary format");
|
System.out.println("Format : Binary dictionary format");
|
||||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
|
||||||
}
|
}
|
||||||
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
||||||
} catch (final IOException | UnsupportedFormatException e) {
|
} catch (final IOException | UnsupportedFormatException e) {
|
||||||
|
|
|
@ -77,16 +77,16 @@ public class Package {
|
||||||
if (mArgs.length != 2) {
|
if (mArgs.length != 2) {
|
||||||
throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
|
throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
|
||||||
}
|
}
|
||||||
final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec =
|
final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodedSpec =
|
||||||
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
|
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
|
||||||
if (null == decodedSpec) {
|
if (null == decodedSpec) {
|
||||||
System.out.println(mArgs[0] + " does not seem to be a dictionary");
|
System.out.println(mArgs[0] + " does not seem to be a dictionary");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
|
||||||
try (
|
try (
|
||||||
final InputStream input = getFileInputStream(decodedSpec.mFile);
|
final InputStream input = getFileInputStream(decodedSpec.mResult);
|
||||||
final OutputStream output = new BufferedOutputStream(
|
final OutputStream output = new BufferedOutputStream(
|
||||||
getFileOutputStreamOrStdOut(mArgs[1]))
|
getFileOutputStreamOrStdOut(mArgs[1]))
|
||||||
) {
|
) {
|
||||||
|
|
|
@ -74,11 +74,11 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Test for an actually compressed dictionary and its contents
|
// Test for an actually compressed dictionary and its contents
|
||||||
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodeSpec =
|
||||||
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
|
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
|
||||||
assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
|
assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
|
||||||
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0,
|
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mResult, 0,
|
||||||
decodeSpec.mFile.length());
|
decodeSpec.mResult.length());
|
||||||
final FusionDictionary resultDict =
|
final FusionDictionary resultDict =
|
||||||
dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
|
||||||
assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get(
|
assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get(
|
||||||
|
|
Loading…
Reference in a new issue