Large simplification in obtaining a raw dictionary

That is where the last refactorings were leading. This code is
simpler, but it's far more flexible. Importantly, it only makes
a single copy instead of making a full disk copy for every
intermediate step.
Next we're going to make the "copy" part modular for processes
that don't need to copy the whole file.

Change-Id: Ief32ac665d804b9b20c44f443a9c87452ceb367a
main
Jean Chalard 2014-10-21 15:37:22 +09:00
parent b1439c2872
commit ae55db95a7
2 changed files with 61 additions and 89 deletions

View File

@ -27,6 +27,7 @@ import java.io.BufferedOutputStream;
import java.io.BufferedReader; import java.io.BufferedReader;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream; import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
import java.io.InputStream; import java.io.InputStream;
@ -50,19 +51,21 @@ public final class BinaryDictOffdeviceUtils {
public static class DecoderChainSpec { public static class DecoderChainSpec {
public final static int COMPRESSION = 1; public final static int COMPRESSION = 1;
public final static int ENCRYPTION = 2; public final static int ENCRYPTION = 2;
private final static int MAX_DECODE_DEPTH = 4;
final int[] mDecoderSpec; private final static int[][] VALID_DECODER_CHAINS = {
{ }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
};
private final int mDecoderSpecIndex;
File mFile; File mFile;
public DecoderChainSpec() { public DecoderChainSpec() {
mDecoderSpec = new int[0]; mDecoderSpecIndex = 0;
mFile = null; mFile = null;
} }
public DecoderChainSpec(final DecoderChainSpec src, final int newStep) { private DecoderChainSpec(final DecoderChainSpec src) {
mDecoderSpec = Arrays.copyOf(src.mDecoderSpec, src.mDecoderSpec.length + 1); mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
mDecoderSpec[src.mDecoderSpec.length] = newStep;
mFile = src.mFile; mFile = src.mFile;
} }
@ -79,12 +82,37 @@ public final class BinaryDictOffdeviceUtils {
public String describeChain() { public String describeChain() {
final StringBuilder s = new StringBuilder("raw"); final StringBuilder s = new StringBuilder("raw");
for (final int step : mDecoderSpec) { for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
s.append(" > "); s.append(" > ");
s.append(getStepDescription(step)); s.append(getStepDescription(step));
} }
return s.toString(); return s.toString();
} }
/**
* Returns the next sequential spec. If exhausted, return null.
*/
public DecoderChainSpec next() {
if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
return null;
}
return new DecoderChainSpec(this);
}
public InputStream getStream(final File src) throws FileNotFoundException, IOException {
InputStream input = new BufferedInputStream(new FileInputStream(src));
for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
switch (step) {
case COMPRESSION:
input = Compress.getUncompressedStream(input);
break;
case ENCRYPTION:
input = Crypt.getDecryptedStream(input);
break;
}
}
return input;
}
} }
public static void copy(final InputStream input, final OutputStream output) throws IOException { public static void copy(final InputStream input, final OutputStream output) throws IOException {
@ -102,88 +130,38 @@ public final class BinaryDictOffdeviceUtils {
* If this is not a dictionary, the method returns null. * If this is not a dictionary, the method returns null.
*/ */
public static DecoderChainSpec getRawDictionaryOrNull(final File src) { public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0); DecoderChainSpec spec = new DecoderChainSpec();
}
private static DecoderChainSpec getRawDictionaryOrNullInternal(
final DecoderChainSpec spec, final File src, final int depth) {
// Unfortunately the decoding scheme we use can consider any data to be encrypted
// and will produce some output, meaning it's not possible to reliably detect encrypted
// data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
// decoding the file.
if (depth > DecoderChainSpec.MAX_DECODE_DEPTH) {
return null;
}
if (BinaryDictDecoderUtils.isBinaryDictionary(src) if (BinaryDictDecoderUtils.isBinaryDictionary(src)
|| CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) { || CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
spec.mFile = src; spec.mFile = src;
return spec; return spec;
} }
// It's not a raw dictionary - try to see if it's compressed. while (null != spec) {
final File uncompressedFile = tryGetUncompressedFile(src);
if (null != uncompressedFile) {
final DecoderChainSpec newSpec =
getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
if (null == newSpec) return null;
return new DecoderChainSpec(newSpec, DecoderChainSpec.COMPRESSION);
}
// It's not a compressed either - try to see if it's crypted.
final File decryptedFile = tryGetDecryptedFile(src);
if (null != decryptedFile) {
final DecoderChainSpec newSpec =
getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
if (null == newSpec) return null;
return new DecoderChainSpec(newSpec, DecoderChainSpec.ENCRYPTION);
}
return null;
}
/* Try to uncompress the file passed as an argument.
*
* If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
* is returned.
*/
private static File tryGetUncompressedFile(final File src) {
try { try {
final File dst = File.createTempFile(PREFIX, SUFFIX); final File dst = File.createTempFile(PREFIX, SUFFIX);
dst.deleteOnExit(); dst.deleteOnExit();
try ( try (final InputStream input = spec.getStream(src);
final InputStream input = Compress.getUncompressedStream( final OutputStream output =
new BufferedInputStream(new FileInputStream(src))); new BufferedOutputStream(new FileOutputStream(dst))) {
final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
) {
copy(input, output); copy(input, output);
return dst; output.flush();
output.close();
if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
|| CombinedInputOutput.isCombinedDictionary(
dst.getAbsolutePath())) {
spec.mFile = dst;
return spec;
}
}
} catch (IOException e) {
// This was not the right format, fall through and try the next
System.out.println("Rejecting " + spec.describeChain() + " : " + e);
System.out.println(e.getStackTrace()[0].toString());
}
spec = spec.next();
} }
} catch (final IOException e) {
// Could not uncompress the file: presumably the file is simply not a compressed file
return null; return null;
} }
}
/* Try to decrypt the file passed as an argument.
*
* If the file can be decrypted, the decrypted version is returned. Otherwise, null
* is returned.
*/
private static File tryGetDecryptedFile(final File src) {
try {
final File dst = File.createTempFile(PREFIX, SUFFIX);
dst.deleteOnExit();
try (
final InputStream input = Crypt.getDecryptedStream(
new BufferedInputStream(new FileInputStream(src)));
final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
) {
copy(input, output);
return dst;
}
} catch (final IOException e) {
// Could not decrypt the file: presumably the file is simply not a crypted file
return null;
}
}
static FusionDictionary getDictionary(final String filename, final boolean report) { static FusionDictionary getDictionary(final String filename, final boolean report) {
final File file = new File(filename); final File file = new File(filename);

View File

@ -68,9 +68,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
final File dst = File.createTempFile("testGetRawDict", ".tmp"); final File dst = File.createTempFile("testGetRawDict", ".tmp");
dst.deleteOnExit(); dst.deleteOnExit();
try (final OutputStream out = Compress.getCompressedStream( try (final OutputStream out = Compress.getCompressedStream(
Compress.getCompressedStream( new BufferedOutputStream(new FileOutputStream(dst)))) {
Compress.getCompressedStream(
new BufferedOutputStream(new FileOutputStream(dst)))))) {
final DictEncoder dictEncoder = new Ver2DictEncoder(out); final DictEncoder dictEncoder = new Ver2DictEncoder(out);
dictEncoder.writeDictionary(dict, new FormatOptions(2, false)); dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
} }
@ -78,11 +76,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
// Test for an actually compressed dictionary and its contents // Test for an actually compressed dictionary and its contents
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec = final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst); BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
for (final int step : decodeSpec.mDecoderSpec) { assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
assertEquals("Wrong decode spec",
BinaryDictOffdeviceUtils.DecoderChainSpec.COMPRESSION, step);
}
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.length);
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0, final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0,
decodeSpec.mFile.length()); decodeSpec.mFile.length());
final FusionDictionary resultDict = final FusionDictionary resultDict =