am 5b33d197: Add a header command to dicttool.

* commit '5b33d197ba7c9e44847561b99f948d5a11e1ecc6':
  Add a header command to dicttool.
main
Jean Chalard 2014-11-06 15:20:26 +00:00 committed by Android Git Automerger
commit ce50d27ba4
4 changed files with 162 additions and 17 deletions

View File

@ -24,8 +24,13 @@ import android.util.SparseArray;
import com.android.inputmethod.latin.BinaryDictionary; import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.common.CodePointUtils; import com.android.inputmethod.latin.common.CodePointUtils;
import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils;
import com.android.inputmethod.latin.dicttool.Compress;
import com.android.inputmethod.latin.dicttool.Crypt;
import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@ -67,6 +72,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>(); private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
private static final HashMap<String, List<String>> sShortcuts = new HashMap<>(); private static final HashMap<String, List<String>> sShortcuts = new HashMap<>();
final Random mRandom;
public BinaryDictDecoderEncoderTests() { public BinaryDictDecoderEncoderTests() {
this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS); this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
} }
@ -75,10 +82,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
super(); super();
BinaryDictionaryUtils.setCurrentTimeForTest(0); BinaryDictionaryUtils.setCurrentTimeForTest(0);
Log.e(TAG, "Testing dictionary: seed is " + seed); Log.e(TAG, "Testing dictionary: seed is " + seed);
final Random random = new Random(seed); mRandom = new Random(seed);
sWords.clear(); sWords.clear();
sWordsWithVariousCodePoints.clear(); sWordsWithVariousCodePoints.clear();
generateWords(maxUnigrams, random); generateWords(maxUnigrams, mRandom);
for (int i = 0; i < sWords.size(); ++i) { for (int i = 0; i < sWords.size(); ++i) {
sChainBigrams.put(i, new ArrayList<Integer>()); sChainBigrams.put(i, new ArrayList<Integer>());
@ -96,10 +103,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
sShortcuts.clear(); sShortcuts.clear();
for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) { for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
final int from = Math.abs(random.nextInt()) % sWords.size(); final int from = Math.abs(mRandom.nextInt()) % sWords.size();
sShortcuts.put(sWords.get(from), new ArrayList<String>()); sShortcuts.put(sWords.get(from), new ArrayList<String>());
for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) { for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
final int to = Math.abs(random.nextInt()) % sWords.size(); final int to = Math.abs(mRandom.nextInt()) % sWords.size();
sShortcuts.get(sWords.get(from)).add(sWords.get(to)); sShortcuts.get(sWords.get(from)).add(sWords.get(to));
} }
} }
@ -604,11 +611,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
+ " : " + outputOptions(bufferType, formatOptions)); + " : " + outputOptions(bufferType, formatOptions));
// Test a word that isn't contained within the dictionary. // Test a word that isn't contained within the dictionary.
final Random random = new Random((int)System.currentTimeMillis());
final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE, final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
random); mRandom);
for (int i = 0; i < 1000; ++i) { for (int i = 0; i < 1000; ++i) {
final String word = CodePointUtils.generateWord(random, codePointSet); final String word = CodePointUtils.generateWord(mRandom, codePointSet);
if (sWords.indexOf(word) != -1) continue; if (sWords.indexOf(word) != -1) continue;
checkGetTerminalPosition(dictDecoder, word, false); checkGetTerminalPosition(dictDecoder, word, false);
} }
@ -731,4 +737,61 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
assertTrue(wordSet.isEmpty()); assertTrue(wordSet.isEmpty());
assertTrue(bigramSet.isEmpty()); assertTrue(bigramSet.isEmpty());
} }
public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt)
throws IOException {
final String dictName = "testHeaderReaderProcessor";
final String dictVersion = Long.toString(System.currentTimeMillis());
final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5;
final HashMap<String, String> options = new HashMap<>();
// Required attributes
options.put("dictionary", "main:en_US");
options.put("locale", "en_US");
options.put("version", Integer.toString(mRandom.nextInt()));
// Add some random options for test
final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1);
for (int i = 0; i < numberOfOptionsToAdd; ++i) {
options.put(sWordsWithVariousCodePoints.get(2 * i),
sWordsWithVariousCodePoints.get(2 * 1 + 1));
}
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
new DictionaryOptions(options));
addUnigrams(sWords.size(), dict, sWords, null);
File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
getContext().getCacheDir());
timeWritingDictToFile(file, dict, formatOptions);
if (compress) {
final File rawFile = file;
file = BinaryDictUtils.getDictFile(dictName + "compress", dictVersion, formatOptions,
getContext().getCacheDir());
final Compress.Compressor compressCommand = new Compress.Compressor();
compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
compressCommand.run();
}
if (crypt) {
final File rawFile = file;
file = BinaryDictUtils.getDictFile(dictName + "crypt", dictVersion, formatOptions,
getContext().getCacheDir());
final Crypt.Encrypter cryptCommand = new Crypt.Encrypter();
cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
cryptCommand.run();
}
final DecoderChainSpec<DictionaryHeader> spec =
BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file,
new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
assertNotNull("Can't decode a dictionary we just wrote : " + file, spec);
final DictionaryHeader header = spec.mResult;
assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""),
spec.describeChain());
assertEquals(header.mDictionaryOptions.mAttributes, options);
}
public void testHeaderReaderProcessor() throws IOException {
runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */);
runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */);
runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */);
}
} }

View File

@ -65,7 +65,7 @@ public final class BinaryDictOffdeviceUtils {
}; };
private final int mDecoderSpecIndex; private final int mDecoderSpecIndex;
T mResult; public T mResult;
public DecoderChainSpec() { public DecoderChainSpec() {
mDecoderSpecIndex = 0; mDecoderSpecIndex = 0;
@ -174,12 +174,13 @@ public final class BinaryDictOffdeviceUtils {
} }
final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8) final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
+ (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF); + (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201) { if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
throw new UnsupportedFormatException("Only versions 2 and 201 are supported"); && version != FormatSpec.VERSION202) {
throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
} }
final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) >> 24) final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
+ ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) >> 16) + ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
+ ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) >> 8) + ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
+ (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF); + (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
if (totalHeaderSize > MAX_HEADER_LENGTH) { if (totalHeaderSize > MAX_HEADER_LENGTH) {
throw new UnsupportedFormatException("Header too large"); throw new UnsupportedFormatException("Header too large");
@ -215,11 +216,22 @@ public final class BinaryDictOffdeviceUtils {
@Nonnull DecoderChainSpec spec = new DecoderChainSpec(); @Nonnull DecoderChainSpec spec = new DecoderChainSpec();
while (null != spec) { while (null != spec) {
try { try {
try (final InputStream input = spec.getStream(src)) { final InputStream input = spec.getStream(src);
spec.mResult = processor.process(input); spec.mResult = processor.process(input);
return spec; try {
input.close();
} catch (IOException e) {
// CipherInputStream doesn't like being closed without having read the
// entire stream, for some reason. But we don't want to because it's a waste
// of resources. We really, really don't care about this.
// However on close() CipherInputStream does throw this exception, wrapped
// in an IOException so we need to catch it.
if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
throw e;
} }
} catch (IOException | UnsupportedFormatException e) { }
return spec;
} catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
// If the format is not the right one for this file, the processor will throw one // If the format is not the right one for this file, the processor will throw one
// of these exceptions. In our case, that means we should try the next spec, // of these exceptions. In our case, that means we should try the next spec,
// since it may still be at another format we haven't tried yet. // since it may still be at another format we haven't tried yet.

View File

@ -20,6 +20,7 @@ public class CommandList {
public static void populate() { public static void populate() {
// TODO: Move some commands to native code. // TODO: Move some commands to native code.
Dicttool.addCommand("info", Info.class); Dicttool.addCommand("info", Info.class);
Dicttool.addCommand("header", Header.class);
Dicttool.addCommand("diff", Diff.class); Dicttool.addCommand("diff", Diff.class);
Dicttool.addCommand("compress", Compress.Compressor.class); Dicttool.addCommand("compress", Compress.Compressor.class);
Dicttool.addCommand("uncompress", Compress.Uncompressor.class); Dicttool.addCommand("uncompress", Compress.Uncompressor.class);

View File

@ -0,0 +1,69 @@
/**
* Copyright (C) 2014 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.BinaryDictionary;
import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
import com.android.inputmethod.latin.makedict.DictionaryHeader;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
import java.io.File;
import java.util.Arrays;
import java.util.Locale;
public class Header extends Dicttool.Command {
public static final String COMMAND = "header";
public Header() {
}
@Override
public String getHelp() {
return COMMAND + " <filename>: prints the header contents of a dictionary file";
}
@Override
public void run() throws UnsupportedFormatException {
final boolean plumbing;
if (mArgs.length > 0 && "-p".equals(mArgs[0])) {
plumbing = true;
mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
} else {
plumbing = false;
}
if (mArgs.length < 1) {
throw new RuntimeException("Not enough arguments for command " + COMMAND);
}
final String filename = mArgs[0];
final File dictFile = new File(filename);
final DecoderChainSpec<DictionaryHeader> spec =
BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile,
new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
if (null == spec) {
throw new UnsupportedFormatException(filename
+ " doesn't seem to be a valid version 2 dictionary file");
}
final DictionaryHeader header = spec.mResult;
System.out.println("Dictionary : " + dictFile.getAbsolutePath());
System.out.println("Size : " + dictFile.length() + " bytes");
System.out.println("Format : Binary dictionary format");
System.out.println("Packaging : " + spec.describeChain());
System.out.println("Header attributes :");
System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing));
}
}