Add auto detection and decoding of dictionary files. (A2)
Bug: 7388852 Change-Id: I25e755fc15f5b383acc046f668e9681efa4f0c2f
This commit is contained in:
parent
77fe603a3d
commit
b3c98901c5
6 changed files with 206 additions and 14 deletions
|
@ -1698,6 +1698,14 @@ public final class BinaryDictInputOutput {
|
|||
return newDict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
|
||||
*/
|
||||
public static boolean isBinaryDictionary(final String filename) {
|
||||
final File file = new File(filename);
|
||||
return isBinaryDictionary(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic test to find out whether the file is a binary dictionary or not.
|
||||
*
|
||||
|
@ -1706,10 +1714,9 @@ public final class BinaryDictInputOutput {
|
|||
* @param filename The name of the file to test.
|
||||
* @return true if it's a binary dictionary, false otherwise
|
||||
*/
|
||||
public static boolean isBinaryDictionary(final String filename) {
|
||||
public static boolean isBinaryDictionary(final File file) {
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
final File file = new File(filename);
|
||||
inStream = new FileInputStream(file);
|
||||
final ByteBuffer buffer = inStream.getChannel().map(
|
||||
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
||||
|
|
|
@ -16,6 +16,7 @@
|
|||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
|
||||
import java.util.ArrayList;
|
||||
|
@ -141,6 +142,11 @@ public final class FusionDictionary implements Iterable<Word> {
|
|||
return NOT_A_TERMINAL != mFrequency;
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public int getFrequency() {
|
||||
return mFrequency;
|
||||
}
|
||||
|
||||
public boolean hasSeveralChars() {
|
||||
assert(mChars.length > 0);
|
||||
return 1 < mChars.length;
|
||||
|
|
|
@ -16,19 +16,42 @@
|
|||
|
||||
package com.android.inputmethod.latin.dicttool;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.InputStream;
|
||||
import java.io.OutputStream;
|
||||
import java.util.ArrayList;
|
||||
|
||||
/**
|
||||
* Class grouping utilities for offline dictionary making.
|
||||
*
|
||||
* Those should not be used on-device, essentially because they are quite
|
||||
* liberal about I/O and performance.
|
||||
*/
|
||||
public class BinaryDictOffdeviceUtils {
|
||||
* Class grouping utilities for offline dictionary making.
|
||||
*
|
||||
* Those should not be used on-device, essentially because they are quite
|
||||
* liberal about I/O and performance.
|
||||
*/
|
||||
public final class BinaryDictOffdeviceUtils {
|
||||
// Prefix and suffix are arbitrary, the values do not really matter
|
||||
private final static String PREFIX = "dicttool";
|
||||
private final static String SUFFIX = ".tmp";
|
||||
|
||||
public final static String COMPRESSION = "compression";
|
||||
|
||||
public static class DecoderChainSpec {
|
||||
ArrayList<String> mDecoderSpec = new ArrayList<String>();
|
||||
File mFile;
|
||||
public DecoderChainSpec addStep(final String stepDescription) {
|
||||
mDecoderSpec.add(stepDescription);
|
||||
return this;
|
||||
}
|
||||
}
|
||||
|
||||
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
||||
final byte[] buffer = new byte[1000];
|
||||
final BufferedInputStream in = new BufferedInputStream(input);
|
||||
|
@ -38,4 +61,51 @@ public class BinaryDictOffdeviceUtils {
|
|||
in.close();
|
||||
out.close();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns a decrypted/uncompressed binary dictionary.
|
||||
*
|
||||
* This will decrypt/uncompress any number of times as necessary until it finds the binary
|
||||
* dictionary signature, and copy the decoded file to a temporary place.
|
||||
* If this is not a binary dictionary, the method returns null.
|
||||
*/
|
||||
public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
|
||||
return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src);
|
||||
}
|
||||
|
||||
private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
|
||||
final DecoderChainSpec spec, final File src) {
|
||||
// TODO: arrange for the intermediary files to be deleted
|
||||
if (BinaryDictInputOutput.isBinaryDictionary(src)) {
|
||||
spec.mFile = src;
|
||||
return spec;
|
||||
}
|
||||
// It's not a raw dictionary - try to see if it's compressed.
|
||||
final File uncompressedFile = tryGetUncompressedFile(src);
|
||||
if (null != uncompressedFile) {
|
||||
final DecoderChainSpec newSpec =
|
||||
getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile);
|
||||
if (null == newSpec) return null;
|
||||
return newSpec.addStep(COMPRESSION);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
/* Try to uncompress the file passed as an argument.
|
||||
*
|
||||
* If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
|
||||
* is returned.
|
||||
*/
|
||||
private static File tryGetUncompressedFile(final File src) {
|
||||
try {
|
||||
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
||||
final FileOutputStream dstStream = new FileOutputStream(dst);
|
||||
copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
|
||||
new BufferedOutputStream(dstStream)); // #copy() closes the streams
|
||||
return dst;
|
||||
} catch (IOException e) {
|
||||
// Could not uncompress the file: presumably the file is simply not a compressed file
|
||||
return null;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -16,6 +16,8 @@
|
|||
|
||||
package com.android.inputmethod.latin.dicttool;
|
||||
|
||||
import java.io.BufferedInputStream;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
|
@ -27,12 +29,12 @@ import java.util.zip.GZIPOutputStream;
|
|||
|
||||
public class Compress {
|
||||
|
||||
private static OutputStream getCompressedStream(final OutputStream out)
|
||||
public static OutputStream getCompressedStream(final OutputStream out)
|
||||
throws java.io.IOException {
|
||||
return new GZIPOutputStream(out);
|
||||
}
|
||||
|
||||
private static InputStream getUncompressedStream(final InputStream in) throws IOException {
|
||||
public static InputStream getUncompressedStream(final InputStream in) throws IOException {
|
||||
return new GZIPInputStream(in);
|
||||
}
|
||||
|
||||
|
@ -55,9 +57,9 @@ public class Compress {
|
|||
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
||||
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
||||
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
||||
: new FileInputStream(new File(inFilename));
|
||||
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
|
||||
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
||||
: new FileOutputStream(new File(outFilename));
|
||||
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
|
||||
BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
|
||||
}
|
||||
}
|
||||
|
@ -81,9 +83,9 @@ public class Compress {
|
|||
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
||||
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
||||
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
||||
: new FileInputStream(new File(inFilename));
|
||||
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
|
||||
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
||||
: new FileOutputStream(new File(outFilename));
|
||||
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
|
||||
BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,106 @@
|
|||
/*
|
||||
* Copyright (C) 2012 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.dicttool;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
|
||||
import junit.framework.TestCase;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.BufferedOutputStream;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileOutputStream;
|
||||
import java.io.IOException;
|
||||
import java.io.OutputStream;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
|
||||
/**
|
||||
* Unit tests for BinaryDictOffdeviceUtilsTests
|
||||
*/
|
||||
public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||
private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
|
||||
|
||||
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
||||
// Create a thrice-compressed dictionary file.
|
||||
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||
new DictionaryOptions(new HashMap<String, String>(),
|
||||
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
||||
dict.add("fta", 1, null, false /* isNotAWord */);
|
||||
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||
dict.add("bar", 1, null, false /* isNotAWord */);
|
||||
dict.add("fool", 1, null, false /* isNotAWord */);
|
||||
|
||||
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||
final OutputStream out = Compress.getCompressedStream(
|
||||
Compress.getCompressedStream(
|
||||
Compress.getCompressedStream(
|
||||
new BufferedOutputStream(new FileOutputStream(dst)))));
|
||||
|
||||
BinaryDictInputOutput.writeDictionaryBinary(out, dict, new FormatOptions(2, false));
|
||||
|
||||
// Test for an actually compressed dictionary and its contents
|
||||
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
||||
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
|
||||
for (final String step : decodeSpec.mDecoderSpec) {
|
||||
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
|
||||
}
|
||||
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
|
||||
final FileInputStream inStream = new FileInputStream(decodeSpec.mFile);
|
||||
final ByteBuffer buffer = inStream.getChannel().map(
|
||||
FileChannel.MapMode.READ_ONLY, 0, decodeSpec.mFile.length());
|
||||
final FusionDictionary resultDict = BinaryDictInputOutput.readDictionaryBinary(
|
||||
new BinaryDictInputOutput.ByteBufferWrapper(buffer),
|
||||
null /* dict : an optional dictionary to add words to, or null */);
|
||||
assertEquals("Dictionary can't be read back correctly",
|
||||
resultDict.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
|
||||
}
|
||||
|
||||
public void testGetRawDictFails() throws IOException {
|
||||
// Randomly create some 4k file containing garbage
|
||||
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||
final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst));
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
out.write(0x12345678);
|
||||
}
|
||||
out.close();
|
||||
|
||||
// Test that a random data file actually fails
|
||||
assertNull("Wrongly identified data file",
|
||||
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
|
||||
|
||||
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
|
||||
final OutputStream gzOut =
|
||||
Compress.getCompressedStream(new BufferedOutputStream(new FileOutputStream(gzDst)));
|
||||
for (int i = 0; i < 1024; ++i) {
|
||||
gzOut.write(0x12345678);
|
||||
}
|
||||
gzOut.close();
|
||||
|
||||
// Test that a compressed random data file actually fails
|
||||
assertNull("Wrongly identified data file",
|
||||
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
|
||||
}
|
||||
}
|
|
@ -14,3 +14,4 @@
|
|||
# limitations under the License.
|
||||
|
||||
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest
|
||||
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtilsTests
|
||||
|
|
Loading…
Reference in a new issue