Large simplification in obtaining a raw dictionary
That is where the last refactorings were leading. This code is simpler, but it's far more flexible. Importantly, it only makes a single copy instead of making a full disk copy for every intermediate step. Next we're going to make the "copy" part modular for processes that don't need to copy the whole file. Change-Id: Ief32ac665d804b9b20c44f443a9c87452ceb367amain
parent
b1439c2872
commit
ae55db95a7
|
@ -27,6 +27,7 @@ import java.io.BufferedOutputStream;
|
||||||
import java.io.BufferedReader;
|
import java.io.BufferedReader;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
|
@ -50,19 +51,21 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
public static class DecoderChainSpec {
|
public static class DecoderChainSpec {
|
||||||
public final static int COMPRESSION = 1;
|
public final static int COMPRESSION = 1;
|
||||||
public final static int ENCRYPTION = 2;
|
public final static int ENCRYPTION = 2;
|
||||||
private final static int MAX_DECODE_DEPTH = 4;
|
|
||||||
|
|
||||||
final int[] mDecoderSpec;
|
private final static int[][] VALID_DECODER_CHAINS = {
|
||||||
|
{ }, { COMPRESSION }, { ENCRYPTION, COMPRESSION }
|
||||||
|
};
|
||||||
|
|
||||||
|
private final int mDecoderSpecIndex;
|
||||||
File mFile;
|
File mFile;
|
||||||
|
|
||||||
public DecoderChainSpec() {
|
public DecoderChainSpec() {
|
||||||
mDecoderSpec = new int[0];
|
mDecoderSpecIndex = 0;
|
||||||
mFile = null;
|
mFile = null;
|
||||||
}
|
}
|
||||||
|
|
||||||
public DecoderChainSpec(final DecoderChainSpec src, final int newStep) {
|
private DecoderChainSpec(final DecoderChainSpec src) {
|
||||||
mDecoderSpec = Arrays.copyOf(src.mDecoderSpec, src.mDecoderSpec.length + 1);
|
mDecoderSpecIndex = src.mDecoderSpecIndex + 1;
|
||||||
mDecoderSpec[src.mDecoderSpec.length] = newStep;
|
|
||||||
mFile = src.mFile;
|
mFile = src.mFile;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -79,12 +82,37 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
|
|
||||||
public String describeChain() {
|
public String describeChain() {
|
||||||
final StringBuilder s = new StringBuilder("raw");
|
final StringBuilder s = new StringBuilder("raw");
|
||||||
for (final int step : mDecoderSpec) {
|
for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
|
||||||
s.append(" > ");
|
s.append(" > ");
|
||||||
s.append(getStepDescription(step));
|
s.append(getStepDescription(step));
|
||||||
}
|
}
|
||||||
return s.toString();
|
return s.toString();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns the next sequential spec. If exhausted, return null.
|
||||||
|
*/
|
||||||
|
public DecoderChainSpec next() {
|
||||||
|
if (mDecoderSpecIndex + 1 >= VALID_DECODER_CHAINS.length) {
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
return new DecoderChainSpec(this);
|
||||||
|
}
|
||||||
|
|
||||||
|
public InputStream getStream(final File src) throws FileNotFoundException, IOException {
|
||||||
|
InputStream input = new BufferedInputStream(new FileInputStream(src));
|
||||||
|
for (final int step : VALID_DECODER_CHAINS[mDecoderSpecIndex]) {
|
||||||
|
switch (step) {
|
||||||
|
case COMPRESSION:
|
||||||
|
input = Compress.getUncompressedStream(input);
|
||||||
|
break;
|
||||||
|
case ENCRYPTION:
|
||||||
|
input = Crypt.getDecryptedStream(input);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return input;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
||||||
|
@ -102,89 +130,39 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
* If this is not a dictionary, the method returns null.
|
* If this is not a dictionary, the method returns null.
|
||||||
*/
|
*/
|
||||||
public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
|
public static DecoderChainSpec getRawDictionaryOrNull(final File src) {
|
||||||
return getRawDictionaryOrNullInternal(new DecoderChainSpec(), src, 0);
|
DecoderChainSpec spec = new DecoderChainSpec();
|
||||||
}
|
|
||||||
|
|
||||||
private static DecoderChainSpec getRawDictionaryOrNullInternal(
|
|
||||||
final DecoderChainSpec spec, final File src, final int depth) {
|
|
||||||
// Unfortunately the decoding scheme we use can consider any data to be encrypted
|
|
||||||
// and will produce some output, meaning it's not possible to reliably detect encrypted
|
|
||||||
// data. Thus, some non-dictionary files (especially small) ones may successfully decrypt
|
|
||||||
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
|
|
||||||
// decoding the file.
|
|
||||||
if (depth > DecoderChainSpec.MAX_DECODE_DEPTH) {
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
if (BinaryDictDecoderUtils.isBinaryDictionary(src)
|
if (BinaryDictDecoderUtils.isBinaryDictionary(src)
|
||||||
|| CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
|
|| CombinedInputOutput.isCombinedDictionary(src.getAbsolutePath())) {
|
||||||
spec.mFile = src;
|
spec.mFile = src;
|
||||||
return spec;
|
return spec;
|
||||||
}
|
}
|
||||||
// It's not a raw dictionary - try to see if it's compressed.
|
while (null != spec) {
|
||||||
final File uncompressedFile = tryGetUncompressedFile(src);
|
try {
|
||||||
if (null != uncompressedFile) {
|
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
||||||
final DecoderChainSpec newSpec =
|
dst.deleteOnExit();
|
||||||
getRawDictionaryOrNullInternal(spec, uncompressedFile, depth + 1);
|
try (final InputStream input = spec.getStream(src);
|
||||||
if (null == newSpec) return null;
|
final OutputStream output =
|
||||||
return new DecoderChainSpec(newSpec, DecoderChainSpec.COMPRESSION);
|
new BufferedOutputStream(new FileOutputStream(dst))) {
|
||||||
}
|
copy(input, output);
|
||||||
// It's not a compressed either - try to see if it's crypted.
|
output.flush();
|
||||||
final File decryptedFile = tryGetDecryptedFile(src);
|
output.close();
|
||||||
if (null != decryptedFile) {
|
if (BinaryDictDecoderUtils.isBinaryDictionary(dst)
|
||||||
final DecoderChainSpec newSpec =
|
|| CombinedInputOutput.isCombinedDictionary(
|
||||||
getRawDictionaryOrNullInternal(spec, decryptedFile, depth + 1);
|
dst.getAbsolutePath())) {
|
||||||
if (null == newSpec) return null;
|
spec.mFile = dst;
|
||||||
return new DecoderChainSpec(newSpec, DecoderChainSpec.ENCRYPTION);
|
return spec;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} catch (IOException e) {
|
||||||
|
// This was not the right format, fall through and try the next
|
||||||
|
System.out.println("Rejecting " + spec.describeChain() + " : " + e);
|
||||||
|
System.out.println(e.getStackTrace()[0].toString());
|
||||||
|
}
|
||||||
|
spec = spec.next();
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Try to uncompress the file passed as an argument.
|
|
||||||
*
|
|
||||||
* If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
|
|
||||||
* is returned.
|
|
||||||
*/
|
|
||||||
private static File tryGetUncompressedFile(final File src) {
|
|
||||||
try {
|
|
||||||
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
|
||||||
dst.deleteOnExit();
|
|
||||||
try (
|
|
||||||
final InputStream input = Compress.getUncompressedStream(
|
|
||||||
new BufferedInputStream(new FileInputStream(src)));
|
|
||||||
final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
|
|
||||||
) {
|
|
||||||
copy(input, output);
|
|
||||||
return dst;
|
|
||||||
}
|
|
||||||
} catch (final IOException e) {
|
|
||||||
// Could not uncompress the file: presumably the file is simply not a compressed file
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Try to decrypt the file passed as an argument.
|
|
||||||
*
|
|
||||||
* If the file can be decrypted, the decrypted version is returned. Otherwise, null
|
|
||||||
* is returned.
|
|
||||||
*/
|
|
||||||
private static File tryGetDecryptedFile(final File src) {
|
|
||||||
try {
|
|
||||||
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
|
||||||
dst.deleteOnExit();
|
|
||||||
try (
|
|
||||||
final InputStream input = Crypt.getDecryptedStream(
|
|
||||||
new BufferedInputStream(new FileInputStream(src)));
|
|
||||||
final OutputStream output = new BufferedOutputStream(new FileOutputStream(dst))
|
|
||||||
) {
|
|
||||||
copy(input, output);
|
|
||||||
return dst;
|
|
||||||
}
|
|
||||||
} catch (final IOException e) {
|
|
||||||
// Could not decrypt the file: presumably the file is simply not a crypted file
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static FusionDictionary getDictionary(final String filename, final boolean report) {
|
static FusionDictionary getDictionary(final String filename, final boolean report) {
|
||||||
final File file = new File(filename);
|
final File file = new File(filename);
|
||||||
if (report) {
|
if (report) {
|
||||||
|
|
|
@ -68,9 +68,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
dst.deleteOnExit();
|
dst.deleteOnExit();
|
||||||
try (final OutputStream out = Compress.getCompressedStream(
|
try (final OutputStream out = Compress.getCompressedStream(
|
||||||
Compress.getCompressedStream(
|
new BufferedOutputStream(new FileOutputStream(dst)))) {
|
||||||
Compress.getCompressedStream(
|
|
||||||
new BufferedOutputStream(new FileOutputStream(dst)))))) {
|
|
||||||
final DictEncoder dictEncoder = new Ver2DictEncoder(out);
|
final DictEncoder dictEncoder = new Ver2DictEncoder(out);
|
||||||
dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
|
dictEncoder.writeDictionary(dict, new FormatOptions(2, false));
|
||||||
}
|
}
|
||||||
|
@ -78,11 +76,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
// Test for an actually compressed dictionary and its contents
|
// Test for an actually compressed dictionary and its contents
|
||||||
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
||||||
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
|
BinaryDictOffdeviceUtils.getRawDictionaryOrNull(dst);
|
||||||
for (final int step : decodeSpec.mDecoderSpec) {
|
assertEquals("Wrong decode spec", "raw > compression", decodeSpec.describeChain());
|
||||||
assertEquals("Wrong decode spec",
|
|
||||||
BinaryDictOffdeviceUtils.DecoderChainSpec.COMPRESSION, step);
|
|
||||||
}
|
|
||||||
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.length);
|
|
||||||
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0,
|
final DictDecoder dictDecoder = BinaryDictIOUtils.getDictDecoder(decodeSpec.mFile, 0,
|
||||||
decodeSpec.mFile.length());
|
decodeSpec.mFile.length());
|
||||||
final FusionDictionary resultDict =
|
final FusionDictionary resultDict =
|
||||||
|
|
Loading…
Reference in New Issue