Add auto detection and decoding of dictionary files. (A2)
Bug: 7388852 Change-Id: I25e755fc15f5b383acc046f668e9681efa4f0c2f
This commit is contained in:
parent
77fe603a3d
commit
b3c98901c5
6 changed files with 206 additions and 14 deletions
|
@ -1698,6 +1698,14 @@ public final class BinaryDictInputOutput {
|
||||||
return newDict;
|
return newDict;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
|
||||||
|
*/
|
||||||
|
public static boolean isBinaryDictionary(final String filename) {
|
||||||
|
final File file = new File(filename);
|
||||||
|
return isBinaryDictionary(file);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Basic test to find out whether the file is a binary dictionary or not.
|
* Basic test to find out whether the file is a binary dictionary or not.
|
||||||
*
|
*
|
||||||
|
@ -1706,10 +1714,9 @@ public final class BinaryDictInputOutput {
|
||||||
* @param filename The name of the file to test.
|
* @param filename The name of the file to test.
|
||||||
* @return true if it's a binary dictionary, false otherwise
|
* @return true if it's a binary dictionary, false otherwise
|
||||||
*/
|
*/
|
||||||
public static boolean isBinaryDictionary(final String filename) {
|
public static boolean isBinaryDictionary(final File file) {
|
||||||
FileInputStream inStream = null;
|
FileInputStream inStream = null;
|
||||||
try {
|
try {
|
||||||
final File file = new File(filename);
|
|
||||||
inStream = new FileInputStream(file);
|
inStream = new FileInputStream(file);
|
||||||
final ByteBuffer buffer = inStream.getChannel().map(
|
final ByteBuffer buffer = inStream.getChannel().map(
|
||||||
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
||||||
|
|
|
@ -16,6 +16,7 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.Constants;
|
import com.android.inputmethod.latin.Constants;
|
||||||
|
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
@ -141,6 +142,11 @@ public final class FusionDictionary implements Iterable<Word> {
|
||||||
return NOT_A_TERMINAL != mFrequency;
|
return NOT_A_TERMINAL != mFrequency;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public int getFrequency() {
|
||||||
|
return mFrequency;
|
||||||
|
}
|
||||||
|
|
||||||
public boolean hasSeveralChars() {
|
public boolean hasSeveralChars() {
|
||||||
assert(mChars.length > 0);
|
assert(mChars.length > 0);
|
||||||
return 1 < mChars.length;
|
return 1 < mChars.length;
|
||||||
|
|
|
@ -16,19 +16,42 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
import java.io.BufferedInputStream;
|
import java.io.BufferedInputStream;
|
||||||
import java.io.BufferedOutputStream;
|
import java.io.BufferedOutputStream;
|
||||||
import java.io.InputStream;
|
import java.io.InputStream;
|
||||||
import java.io.OutputStream;
|
import java.io.OutputStream;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Class grouping utilities for offline dictionary making.
|
* Class grouping utilities for offline dictionary making.
|
||||||
*
|
*
|
||||||
* Those should not be used on-device, essentially because they are quite
|
* Those should not be used on-device, essentially because they are quite
|
||||||
* liberal about I/O and performance.
|
* liberal about I/O and performance.
|
||||||
*/
|
*/
|
||||||
public class BinaryDictOffdeviceUtils {
|
public final class BinaryDictOffdeviceUtils {
|
||||||
|
// Prefix and suffix are arbitrary, the values do not really matter
|
||||||
|
private final static String PREFIX = "dicttool";
|
||||||
|
private final static String SUFFIX = ".tmp";
|
||||||
|
|
||||||
|
public final static String COMPRESSION = "compression";
|
||||||
|
|
||||||
|
public static class DecoderChainSpec {
|
||||||
|
ArrayList<String> mDecoderSpec = new ArrayList<String>();
|
||||||
|
File mFile;
|
||||||
|
public DecoderChainSpec addStep(final String stepDescription) {
|
||||||
|
mDecoderSpec.add(stepDescription);
|
||||||
|
return this;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
public static void copy(final InputStream input, final OutputStream output) throws IOException {
|
||||||
final byte[] buffer = new byte[1000];
|
final byte[] buffer = new byte[1000];
|
||||||
final BufferedInputStream in = new BufferedInputStream(input);
|
final BufferedInputStream in = new BufferedInputStream(input);
|
||||||
|
@ -38,4 +61,51 @@ public class BinaryDictOffdeviceUtils {
|
||||||
in.close();
|
in.close();
|
||||||
out.close();
|
out.close();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Returns a decrypted/uncompressed binary dictionary.
|
||||||
|
*
|
||||||
|
* This will decrypt/uncompress any number of times as necessary until it finds the binary
|
||||||
|
* dictionary signature, and copy the decoded file to a temporary place.
|
||||||
|
* If this is not a binary dictionary, the method returns null.
|
||||||
|
*/
|
||||||
|
public static DecoderChainSpec getRawBinaryDictionaryOrNull(final File src) {
|
||||||
|
return getRawBinaryDictionaryOrNullInternal(new DecoderChainSpec(), src);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static DecoderChainSpec getRawBinaryDictionaryOrNullInternal(
|
||||||
|
final DecoderChainSpec spec, final File src) {
|
||||||
|
// TODO: arrange for the intermediary files to be deleted
|
||||||
|
if (BinaryDictInputOutput.isBinaryDictionary(src)) {
|
||||||
|
spec.mFile = src;
|
||||||
|
return spec;
|
||||||
|
}
|
||||||
|
// It's not a raw dictionary - try to see if it's compressed.
|
||||||
|
final File uncompressedFile = tryGetUncompressedFile(src);
|
||||||
|
if (null != uncompressedFile) {
|
||||||
|
final DecoderChainSpec newSpec =
|
||||||
|
getRawBinaryDictionaryOrNullInternal(spec, uncompressedFile);
|
||||||
|
if (null == newSpec) return null;
|
||||||
|
return newSpec.addStep(COMPRESSION);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Try to uncompress the file passed as an argument.
|
||||||
|
*
|
||||||
|
* If the file can be uncompressed, the uncompressed version is returned. Otherwise, null
|
||||||
|
* is returned.
|
||||||
|
*/
|
||||||
|
private static File tryGetUncompressedFile(final File src) {
|
||||||
|
try {
|
||||||
|
final File dst = File.createTempFile(PREFIX, SUFFIX);
|
||||||
|
final FileOutputStream dstStream = new FileOutputStream(dst);
|
||||||
|
copy(Compress.getUncompressedStream(new BufferedInputStream(new FileInputStream(src))),
|
||||||
|
new BufferedOutputStream(dstStream)); // #copy() closes the streams
|
||||||
|
return dst;
|
||||||
|
} catch (IOException e) {
|
||||||
|
// Could not uncompress the file: presumably the file is simply not a compressed file
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,6 +16,8 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
|
import java.io.BufferedInputStream;
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileOutputStream;
|
import java.io.FileOutputStream;
|
||||||
|
@ -27,12 +29,12 @@ import java.util.zip.GZIPOutputStream;
|
||||||
|
|
||||||
public class Compress {
|
public class Compress {
|
||||||
|
|
||||||
private static OutputStream getCompressedStream(final OutputStream out)
|
public static OutputStream getCompressedStream(final OutputStream out)
|
||||||
throws java.io.IOException {
|
throws java.io.IOException {
|
||||||
return new GZIPOutputStream(out);
|
return new GZIPOutputStream(out);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static InputStream getUncompressedStream(final InputStream in) throws IOException {
|
public static InputStream getUncompressedStream(final InputStream in) throws IOException {
|
||||||
return new GZIPInputStream(in);
|
return new GZIPInputStream(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -55,9 +57,9 @@ public class Compress {
|
||||||
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
||||||
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
||||||
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
||||||
: new FileInputStream(new File(inFilename));
|
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
|
||||||
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
||||||
: new FileOutputStream(new File(outFilename));
|
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
|
||||||
BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
|
BinaryDictOffdeviceUtils.copy(input, new GZIPOutputStream(output));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -81,9 +83,9 @@ public class Compress {
|
||||||
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
final String inFilename = mArgs.length >= 1 ? mArgs[0] : STDIN_OR_STDOUT;
|
||||||
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
final String outFilename = mArgs.length >= 2 ? mArgs[1] : STDIN_OR_STDOUT;
|
||||||
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
final InputStream input = inFilename.equals(STDIN_OR_STDOUT) ? System.in
|
||||||
: new FileInputStream(new File(inFilename));
|
: new BufferedInputStream(new FileInputStream(new File(inFilename)));
|
||||||
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
final OutputStream output = outFilename.equals(STDIN_OR_STDOUT) ? System.out
|
||||||
: new FileOutputStream(new File(outFilename));
|
: new BufferedOutputStream(new FileOutputStream(new File(outFilename)));
|
||||||
BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
|
BinaryDictOffdeviceUtils.copy(new GZIPInputStream(input), output);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,106 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2012 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||||
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
|
|
||||||
|
import junit.framework.TestCase;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.BufferedOutputStream;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileOutputStream;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.HashMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Unit tests for BinaryDictOffdeviceUtilsTests
|
||||||
|
*/
|
||||||
|
public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
|
private static final int TEST_FREQ = 37; // Some arbitrary value unlikely to happen by chance
|
||||||
|
|
||||||
|
public void testGetRawDictWorks() throws IOException, UnsupportedFormatException {
|
||||||
|
// Create a thrice-compressed dictionary file.
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new Node(),
|
||||||
|
new DictionaryOptions(new HashMap<String, String>(),
|
||||||
|
false /* germanUmlautProcessing */, false /* frenchLigatureProcessing */));
|
||||||
|
dict.add("foo", TEST_FREQ, null, false /* isNotAWord */);
|
||||||
|
dict.add("fta", 1, null, false /* isNotAWord */);
|
||||||
|
dict.add("ftb", 1, null, false /* isNotAWord */);
|
||||||
|
dict.add("bar", 1, null, false /* isNotAWord */);
|
||||||
|
dict.add("fool", 1, null, false /* isNotAWord */);
|
||||||
|
|
||||||
|
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
|
final OutputStream out = Compress.getCompressedStream(
|
||||||
|
Compress.getCompressedStream(
|
||||||
|
Compress.getCompressedStream(
|
||||||
|
new BufferedOutputStream(new FileOutputStream(dst)))));
|
||||||
|
|
||||||
|
BinaryDictInputOutput.writeDictionaryBinary(out, dict, new FormatOptions(2, false));
|
||||||
|
|
||||||
|
// Test for an actually compressed dictionary and its contents
|
||||||
|
final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec =
|
||||||
|
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst);
|
||||||
|
for (final String step : decodeSpec.mDecoderSpec) {
|
||||||
|
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
|
||||||
|
}
|
||||||
|
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
|
||||||
|
final FileInputStream inStream = new FileInputStream(decodeSpec.mFile);
|
||||||
|
final ByteBuffer buffer = inStream.getChannel().map(
|
||||||
|
FileChannel.MapMode.READ_ONLY, 0, decodeSpec.mFile.length());
|
||||||
|
final FusionDictionary resultDict = BinaryDictInputOutput.readDictionaryBinary(
|
||||||
|
new BinaryDictInputOutput.ByteBufferWrapper(buffer),
|
||||||
|
null /* dict : an optional dictionary to add words to, or null */);
|
||||||
|
assertEquals("Dictionary can't be read back correctly",
|
||||||
|
resultDict.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testGetRawDictFails() throws IOException {
|
||||||
|
// Randomly create some 4k file containing garbage
|
||||||
|
final File dst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
|
final OutputStream out = new BufferedOutputStream(new FileOutputStream(dst));
|
||||||
|
for (int i = 0; i < 1024; ++i) {
|
||||||
|
out.write(0x12345678);
|
||||||
|
}
|
||||||
|
out.close();
|
||||||
|
|
||||||
|
// Test that a random data file actually fails
|
||||||
|
assertNull("Wrongly identified data file",
|
||||||
|
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(dst));
|
||||||
|
|
||||||
|
final File gzDst = File.createTempFile("testGetRawDict", ".tmp");
|
||||||
|
final OutputStream gzOut =
|
||||||
|
Compress.getCompressedStream(new BufferedOutputStream(new FileOutputStream(gzDst)));
|
||||||
|
for (int i = 0; i < 1024; ++i) {
|
||||||
|
gzOut.write(0x12345678);
|
||||||
|
}
|
||||||
|
gzOut.close();
|
||||||
|
|
||||||
|
// Test that a compressed random data file actually fails
|
||||||
|
assertNull("Wrongly identified data file",
|
||||||
|
BinaryDictOffdeviceUtils.getRawBinaryDictionaryOrNull(gzDst));
|
||||||
|
}
|
||||||
|
}
|
|
@ -14,3 +14,4 @@
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
|
|
||||||
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest
|
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.makedict.BinaryDictInputOutputTest
|
||||||
|
java -classpath ${ANDROID_HOST_OUT}/framework/junit.jar:${ANDROID_HOST_OUT}/framework/dicttool_aosp.jar junit.textui.TestRunner com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtilsTests
|
||||||
|
|
Loading…
Reference in a new issue