Genericize getting a raw dictionary

This will allow for not copying the whole dictionary when only
the header is needed.

Change-Id: Ie4a649b507ccd4a430201824ed87b8b8bbf55e9f
This commit is contained in:
Jean Chalard 2014-10-21 16:20:19 +09:00
parent ab68143ad0
commit 5564317f83
3 changed files with 71 additions and 48 deletions

View file

@ -35,6 +35,9 @@ import java.io.InputStreamReader;
import java.io.OutputStream; import java.io.OutputStream;
import java.util.Arrays; import java.util.Arrays;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
/** /**
* Class grouping utilities for offline dictionary making. * Class grouping utilities for offline dictionary making.
* *
@ -45,10 +48,9 @@ public final class BinaryDictOffdeviceUtils {
// Prefix and suffix are arbitrary, the values do not really matter // Prefix and suffix are arbitrary, the values do not really matter
private final static String PREFIX = "dicttool"; private final static String PREFIX = "dicttool";
private final static String SUFFIX = ".tmp"; private final static String SUFFIX = ".tmp";
private final static int COPY_BUFFER_SIZE = 8192; private final static int COPY_BUFFER_SIZE = 8192;
public static class DecoderChainSpec { public static class DecoderChainSpec<T> {
public final static int COMPRESSION = 1; public final static int COMPRESSION = 1;
public final static int ENCRYPTION = 2; public final static int ENCRYPTION = 2;
@ -57,16 +59,16 @@ public final class BinaryDictOffdeviceUtils {
}; };
private final int mDecoderSpecIndex; private final int mDecoderSpecIndex;
File mFile; T mResult;
public DecoderChainSpec() { public DecoderChainSpec() {
mDecoderSpecIndex = 0; mDecoderSpecIndex = 0;
mFile = null; mResult = null;
} }
private DecoderChainSpec(final DecoderChainSpec src) { private DecoderChainSpec(final DecoderChainSpec<T> src) {
mDecoderSpecIndex = src.mDecoderSpecIndex + 1; mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
mFile = src.mFile; mResult = src.mResult;
} }
private String getStepDescription(final int step) { private String getStepDescription(final int step) {
@ -115,6 +117,31 @@ public final class BinaryDictOffdeviceUtils {
} }
} }
public interface InputProcessor<T> {
@Nonnull
public T process(@Nonnull final InputStream input)
throws IOException, UnsupportedFormatException;
}
public static class CopyProcessor implements InputProcessor<File> {
@Override @Nonnull
public File process(@Nonnull final InputStream input) throws IOException,
UnsupportedFormatException {
final File dst = File.createTempFile(PREFIX, SUFFIX);
dst.deleteOnExit();
try (final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))) {
copy(input, output);
output.flush();
output.close();
if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
|| CombinedInputOutput.isCombinedDictionary(dst.getAbsolutePath())) {
return dst;
}
}
throw new UnsupportedFormatException("Input stream not at the expected format");
}
}
public static void copy(final InputStream input, final OutputStream output) throws IOException { public static void copy(final InputStream input, final OutputStream output) throws IOException {
final byte[] buffer = new byte[COPY_BUFFER_SIZE]; final byte[] buffer = new byte[COPY_BUFFER_SIZE];
for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) { for (int readBytes = input.read(buffer); readBytes >= 0; readBytes = input.read(buffer)) {
@ -123,46 +150,42 @@ public final class BinaryDictOffdeviceUtils {
} }
/** /**
* Returns a decrypted/uncompressed dictionary. * Process a dictionary, decrypting/uncompressing it on the fly as necessary.
* *
* This will decrypt/uncompress any number of times as necessary until it finds the * This will execute the given processor repeatedly with the possible alternatives
* dictionary signature, and copy the decoded file to a temporary place. * for dictionary format until the processor does not throw an exception.
* If this is not a dictionary, the method returns null. * If the processor succeeds for none of the possible formats, the method returns null.
*/ */
public static DecoderChainSpec getRawDictionaryOrNull(final File src) { @Nullable
DecoderChainSpec spec = new DecoderChainSpec(); public static <T> DecoderChainSpec<T> decodeDictionaryForProcess(@Nonnull final File src,
if (BinaryDictDecoderUtils.isBinaryDictionary(src) @Nonnull final InputProcessor<T> processor) {
|| CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) { @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
spec.mFile = src;
return spec;
}
while (null != spec) { while (null != spec) {
try { try {
final File dst = File.createTempFile(PREFIX, SUFFIX); try (final InputStream input = spec.getStream(src)) {
dst.deleteOnExit(); spec.mResult = processor.process(input);
try (final InputStream input = spec.getStream(src);
final OutputStream output =
new BufferedOutputStream(new FileOutputStream(dst))) {
copy(input, output);
output.flush();
output.close();
if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
|| CombinedInputOutput.isCombinedDictionary(
dst.getAbsolutePath())) {
spec.mFile = dst;
return spec; return spec;
} }
} } catch (IOException | UnsupportedFormatException e) {
} catch (IOException e) { // If the format is not the right one for this file, the processor will throw one
// This was not the right format, fall through and try the next // of these exceptions. In our case, that means we should try the next spec,
System.out.println("Rejecting " + spec.describeChain() + " : " + e); // since it may still be at another format we haven't tried yet.
System.out.println(e.getStackTrace()[0].toString()); // TODO: stop using exceptions for this non-exceptional case.
} }
spec = spec.next(); spec = spec.next();
} }
return null; return null;
} }
/**
* Get a decoder chain spec with a raw dictionary file. This makes a new file on the
* disk ready for any treatment the client wants.
*/
@Nullable
public static DecoderChainSpec<File> getRawDictionaryOrNull(@Nonnull final File src) {
return decodeDictionaryForProcess(src, new CopyProcessor());
}
static FusionDictionary getDictionary(final String filename, final boolean report) { static FusionDictionary getDictionary(final String filename, final boolean report) {
final File file = new File(filename); final File file = new File(filename);
if (report) { if (report) {
@ -170,28 +193,28 @@ public final class BinaryDictOffdeviceUtils {
System.out.println("Size : " + file.length() + " bytes"); System.out.println("Size : " + file.length() + " bytes");
} }
try { try {
final DecoderChainSpec decodedSpec = getRawDictionaryOrNull(file); final DecoderChainSpec<File> decodedSpec = getRawDictionaryOrNull(file);
if (null == decodedSpec) { if (null == decodedSpec) {
throw new RuntimeException("Does not seem to be a dictionary file " + filename); throw new RuntimeException("Does not seem to be a dictionary file " + filename);
} }
if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mFile.getAbsolutePath())) { if (CombinedInputOutput.isCombinedDictionary(decodedSpec.mResult.getAbsolutePath())) {
if (report) { if (report) {
System.out.println("Format : Combined format"); System.out.println("Format : Combined format");
System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
} }
try (final BufferedReader reader = new BufferedReader( try (final BufferedReader reader = new BufferedReader(
new InputStreamReader(new FileInputStream(decodedSpec.mFile), "UTF-8"))) { new InputStreamReader(new FileInputStream(decodedSpec.mResult), "UTF-8"))) {
return CombinedInputOutput.readDictionaryCombined(reader); return CombinedInputOutput.readDictionaryCombined(reader);
} }
} }
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder( final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(
decodedSpec.mFile, 0, decodedSpec.mFile.length(), decodedSpec.mResult, 0, decodedSpec.mResult.length(),
DictDecoder.USE_BYTEARRAY); DictDecoder.USE_BYTEARRAY);
if (report) { if (report) {
System.out.println("Format : Binary dictionary format"); System.out.println("Format : Binary dictionary format");
System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
} }
return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); return dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
} catch (final IOException | UnsupportedFormatException e) { } catch (final IOException | UnsupportedFormatException e) {

View file

@ -77,16 +77,16 @@ public class Package {
if (mArgs.length != 2) { if (mArgs.length != 2) {
throw new RuntimeException("Too many/too few arguments for command " + COMMAND); throw new RuntimeException("Too many/too few arguments for command " + COMMAND);
} }
final BinaryDictOffdeviceUtils.DecoderChainSpec decodedSpec = final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodedSpec =
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0])); BinaryDictOffdeviceUtils.getRawDictionaryOrNull(new File(mArgs[0]));
if (null == decodedSpec) { if (null == decodedSpec) {
System.out.println(mArgs[0] + " does not seem to be a dictionary"); System.out.println(mArgs[0] + " does not seem to be a dictionary");
return; return;
} }
System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); System.out.println("Uncompressed size : " + decodedSpec.mResult.length());
try ( try (
final InputStream input = getFileInputStream(decodedSpec.mFile); final InputStream input = getFileInputStream(decodedSpec.mResult);
final OutputStream output = new BufferedOutputStream( final OutputStream output = new BufferedOutputStream(
getFileOutputStreamOrStdOut(mArgs[1])) getFileOutputStreamOrStdOut(mArgs[1]))
) { ) {

View file

@ -74,11 +74,11 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
} }
// Test for an actually compressed dictionary and its contents // Test for an actually compressed dictionary and its contents
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec = final BinaryDictOffdeviceUtils.DecoderChainSpec<File> decodeSpec =
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst); BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain()); assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0, final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mResult, 0,
decodeSpec.mFile.length()); decodeSpec.mResult.length());
final FusionDictionary resultDict = final FusionDictionary resultDict =
dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */); dictDecoder.readDictionaryBinary(false /* deleteDictIfBroken */);
assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get( assertEquals("Wrong version attribute", VERSION, resultDict.mOptions.mAttributes.get(