Add auto detection and decoding of dictionary files. (A2)

Bug: 7388852
Change-Id: I25e755fc15f5b383acc046f668e9681efa4f0c2f
This commit is contained in:
Jean Chalard 2012-10-23 17:14:12 +09:00
parent 77fe603a3d
commit b3c98901c5
6 changed files with 206 additions and 14 deletions

View file

@ -1698,6 +1698,14 @@ public final class BinaryDictInputOutput {
return newDict;
}
/**
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
*/
public static boolean isBinaryDictionary(final String filename) {
final File file = new File(filename);
return isBinaryDictionary(file);
}
/**
* Basic test to find out whether the file is a binary dictionary or not.
*
@ -1706,10 +1714,9 @@ public final class BinaryDictInputOutput {
* @param filename The name of the file to test.
* @return true if it's a binary dictionary, false otherwise
*/
public static boolean isBinaryDictionary(final String filename) {
public static boolean isBinaryDictionary(final File file) {
FileInputStream inStream = null;
try {
final File file = new File(filename);
inStream = new FileInputStream(file);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, file.length());

View file

@ -16,6 +16,7 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import java.util.ArrayList;
@ -141,6 +142,11 @@ public final class FusionDictionary implements Iterable<Word> {
return NOT_A_TERMINAL != mFrequency;
}
@UsedForTesting
public int getFrequency() {
return mFrequency;
}
public boolean hasSeveralChars() {
assert(mChars.length > 0);
return 1 < mChars.length;

View file

@ -16,19 +16,42 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import java.io.File;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.ArrayList;
/**
* Class grouping utilities for offline dictionary making.
*
* Those should not be used on-device, essentially because they are quite
* liberal about I/O and performance.
*/
public class BinaryDictOffdeviceUtils {
* Class grouping utilities for offline dictionary making.
*
* Those should not be used on-device, essentially because they are quite
* liberal about I/O and performance.
*/
public final class BinaryDictOffdeviceUtils {
// Prefix and suffix are arbitrary, the values do not really matter
private final static String PREFIX = "dicttool";
private final static String SUFFIX = ".tmp";
public final static String COMPRESSION = "compression";
public static class DecoderChainSpec {
ArrayList<String> mDecoderSpec = new ArrayList<String>();
File mFile;
public DecoderChainSpec addStep(final String stepDescription) {
mDecoderSpec.add(stepDescription);
return this;
}
}
public static void copy(final InputStream input, final OutputStream output) throws IOException {
final byte[] buffer = new byte[1000];
final BufferedInputStream in = new BufferedInputStream(input);
@ -38,4 +61,51 @@ public class BinaryDictOffdeviceUtils {
in.close();
out.close();
}
/**
* Returns a decrypted/uncompressed binary dictionary.
*
* This will decrypt/uncompress any number of times as necessary until it finds the binary
* dictionary signature, and copy the decoded file to a temporary place.
* If this is not a binary dictionary, the method returns null.
*/
public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src);
}
private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
final DecoderChainSpec spec, final File src) {
// TODO: arrange for the intermediary files to be deleted
if (BinaryDictInputOutput.isBinaryDictionary(src)) {
spec.mFile = src;
return spec;
}
// It's not a raw dictionary - try to see if it's compressed.
final File uncompressedFile = tryGetUncompressedFile(src);
if (null != uncompressedFile) {
final DecoderChainSpec newSpec =
getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile);
if (null == newSpec) return null;
return newSpec.addStep(COMPRESSION);
}
return null;
}
/* Try to uncompress the file passed as an argument.
*
* If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
* is returned.
*/
private static File tryGetUncompressedFile(final File src) {
try {
final File dst = File.createTempFile(PREFIX, SUFFIX);
final FileOutputStream dstStream = new FileOutputStream(dst);
copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
new BufferedOutputStream(dstStream)); // #copy() closes the streams
return dst;
} catch (IOException e) {
// Could not uncompress the file: presumably the file is simply not a compressed file
return null;
}
}
}

View file

@ -16,6 +16,8 @@
package com.android.inputmethod.latin.dicttool;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
@ -27,12 +29,12 @@ import java.util.zip.GZIPOutputStream;
public class Compress {
private static OutputStream getCompressedStream(final OutputStream out)
public static OutputStream getCompressedStream(final OutputStream out)
throws java.io.IOException {
return new GZIPOutputStream(out);
}
private static InputStream getUncompressedStream(final InputStream in) throws IOException {
public static InputStream getUncompressedStream(final InputStream in) throws IOException {
return new GZIPInputStream(in);
}
@ -55,9 +57,9 @@ public class Compress {
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
: new FileInputStream(new File(inFilename));
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
: new FileOutputStream(new File(outFilename));
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
}
}
@ -81,9 +83,9 @@ public class Compress {
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
: new FileInputStream(new File(inFilename));
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
: new FileOutputStream(new File(outFilename));
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
}
}

View file

@ -0,0 +1,106 @@
/*
* Copyright (C) 2012 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import junit.framework.TestCase;
import java.io.File;
import java.io.BufferedOutputStream;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.HashMap;
/**
* Unit tests for BinaryDictOffdeviceUtilsTests
*/
public class BinaryDictOffdeviceUtilsTests extends TestCase {
private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
// Create a thrice-compressed dictionary file.
final FusionDictionary dict = new FusionDictionary(new Node(),
new DictionaryOptions(new HashMap<String, String>(),
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
dict.add("fta", 1, null, false /* isNotAWord */);
dict.add("ftb", 1, null, false /* isNotAWord */);
dict.add("bar", 1, null, false /* isNotAWord */);
dict.add("fool", 1, null, false /* isNotAWord */);
final File dst = File.createTempFile("testGetRawDict", ".tmp");
final OutputStream out = Compress.getCompressedStream(
Compress.getCompressedStream(
Compress.getCompressedStream(
new BufferedOutputStream(new FileOutputStream(dst)))));
BinaryDictInputOutput.writeDictionaryBinary(out, dict, new FormatOptions(2, false));
// Test for an actually compressed dictionary and its contents
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
for (final String step : decodeSpec.mDecoderSpec) {
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
}
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
final FileInputStream inStream = new FileInputStream(decodeSpec.mFile);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, decodeSpec.mFile.length());
final FusionDictionary resultDict = BinaryDictInputOutput.readDictionaryBinary(
new BinaryDictInputOutput.ByteBufferWrapper(buffer),
null /* dict : an optional dictionary to add words to, or null */);
assertEquals("Dictionary can't be read back correctly",
resultDict.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
}
public void testGetRawDictFails() throws IOException {
// Randomly create some 4k file containing garbage
final File dst = File.createTempFile("testGetRawDict", ".tmp");
final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst));
for (int i = 0; i < 1024; ++i) {
out.write(0x12345678);
}
out.close();
// Test that a random data file actually fails
assertNull("Wrongly identified data file",
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
final OutputStream gzOut =
Compress.getCompressedStream(new BufferedOutputStream(new FileOutputStream(gzDst)));
for (int i = 0; i < 1024; ++i) {
gzOut.write(0x12345678);
}
gzOut.close();
// Test that a compressed random data file actually fails
assertNull("Wrongly identified data file",
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
}
}

View file

@ -14,3 +14,4 @@
# limitations under the License.
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtilsTests