am 5b33d197: Add a header command to dicttool.
* commit '5b33d197ba7c9e44847561b99f948d5a11e1ecc6': Add a header command to dicttool.main
commit
ce50d27ba4
|
@ -24,8 +24,13 @@ import android.util.SparseArray;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.BinaryDictionary;
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
import com.android.inputmethod.latin.common.CodePointUtils;
|
import com.android.inputmethod.latin.common.CodePointUtils;
|
||||||
|
import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils;
|
||||||
|
import com.android.inputmethod.latin.dicttool.Compress;
|
||||||
|
import com.android.inputmethod.latin.dicttool.Crypt;
|
||||||
|
import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec.DictionaryOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNode;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
|
@ -67,6 +72,8 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
|
private static final SparseArray<List<Integer>> sChainBigrams = new SparseArray<>();
|
||||||
private static final HashMap<String, List<String>> sShortcuts = new HashMap<>();
|
private static final HashMap<String, List<String>> sShortcuts = new HashMap<>();
|
||||||
|
|
||||||
|
final Random mRandom;
|
||||||
|
|
||||||
public BinaryDictDecoderEncoderTests() {
|
public BinaryDictDecoderEncoderTests() {
|
||||||
this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
|
this(System.currentTimeMillis(), DEFAULT_MAX_UNIGRAMS);
|
||||||
}
|
}
|
||||||
|
@ -75,10 +82,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
super();
|
super();
|
||||||
BinaryDictionaryUtils.setCurrentTimeForTest(0);
|
BinaryDictionaryUtils.setCurrentTimeForTest(0);
|
||||||
Log.e(TAG, "Testing dictionary: seed is " + seed);
|
Log.e(TAG, "Testing dictionary: seed is " + seed);
|
||||||
final Random random = new Random(seed);
|
mRandom = new Random(seed);
|
||||||
sWords.clear();
|
sWords.clear();
|
||||||
sWordsWithVariousCodePoints.clear();
|
sWordsWithVariousCodePoints.clear();
|
||||||
generateWords(maxUnigrams, random);
|
generateWords(maxUnigrams, mRandom);
|
||||||
|
|
||||||
for (int i = 0; i < sWords.size(); ++i) {
|
for (int i = 0; i < sWords.size(); ++i) {
|
||||||
sChainBigrams.put(i, new ArrayList<Integer>());
|
sChainBigrams.put(i, new ArrayList<Integer>());
|
||||||
|
@ -96,10 +103,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
|
|
||||||
sShortcuts.clear();
|
sShortcuts.clear();
|
||||||
for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
|
for (int i = 0; i < NUM_OF_NODES_HAVING_SHORTCUTS; ++i) {
|
||||||
final int from = Math.abs(random.nextInt()) % sWords.size();
|
final int from = Math.abs(mRandom.nextInt()) % sWords.size();
|
||||||
sShortcuts.put(sWords.get(from), new ArrayList<String>());
|
sShortcuts.put(sWords.get(from), new ArrayList<String>());
|
||||||
for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
|
for (int j = 0; j < NUM_OF_SHORTCUTS; ++j) {
|
||||||
final int to = Math.abs(random.nextInt()) % sWords.size();
|
final int to = Math.abs(mRandom.nextInt()) % sWords.size();
|
||||||
sShortcuts.get(sWords.get(from)).add(sWords.get(to));
|
sShortcuts.get(sWords.get(from)).add(sWords.get(to));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -604,11 +611,10 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
+ " : " + outputOptions(bufferType, formatOptions));
|
+ " : " + outputOptions(bufferType, formatOptions));
|
||||||
|
|
||||||
// Test a word that isn't contained within the dictionary.
|
// Test a word that isn't contained within the dictionary.
|
||||||
final Random random = new Random((int)System.currentTimeMillis());
|
|
||||||
final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
|
final int[] codePointSet = CodePointUtils.generateCodePointSet(DEFAULT_CODE_POINT_SET_SIZE,
|
||||||
random);
|
mRandom);
|
||||||
for (int i = 0; i < 1000; ++i) {
|
for (int i = 0; i < 1000; ++i) {
|
||||||
final String word = CodePointUtils.generateWord(random, codePointSet);
|
final String word = CodePointUtils.generateWord(mRandom, codePointSet);
|
||||||
if (sWords.indexOf(word) != -1) continue;
|
if (sWords.indexOf(word) != -1) continue;
|
||||||
checkGetTerminalPosition(dictDecoder, word, false);
|
checkGetTerminalPosition(dictDecoder, word, false);
|
||||||
}
|
}
|
||||||
|
@ -731,4 +737,61 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
assertTrue(wordSet.isEmpty());
|
assertTrue(wordSet.isEmpty());
|
||||||
assertTrue(bigramSet.isEmpty());
|
assertTrue(bigramSet.isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void runTestHeaderReaderProcessorWithOneSpec(final boolean compress, final boolean crypt)
|
||||||
|
throws IOException {
|
||||||
|
final String dictName = "testHeaderReaderProcessor";
|
||||||
|
final String dictVersion = Long.toString(System.currentTimeMillis());
|
||||||
|
final FormatOptions formatOptions = BinaryDictUtils.STATIC_OPTIONS;
|
||||||
|
final int MAX_NUMBER_OF_OPTIONS_TO_ADD = 5;
|
||||||
|
final HashMap<String, String> options = new HashMap<>();
|
||||||
|
// Required attributes
|
||||||
|
options.put("dictionary", "main:en_US");
|
||||||
|
options.put("locale", "en_US");
|
||||||
|
options.put("version", Integer.toString(mRandom.nextInt()));
|
||||||
|
// Add some random options for test
|
||||||
|
final int numberOfOptionsToAdd = mRandom.nextInt() % (MAX_NUMBER_OF_OPTIONS_TO_ADD + 1);
|
||||||
|
for (int i = 0; i < numberOfOptionsToAdd; ++i) {
|
||||||
|
options.put(sWordsWithVariousCodePoints.get(2 * i),
|
||||||
|
sWordsWithVariousCodePoints.get(2 * 1 + 1));
|
||||||
|
}
|
||||||
|
final FusionDictionary dict = new FusionDictionary(new PtNodeArray(),
|
||||||
|
new DictionaryOptions(options));
|
||||||
|
addUnigrams(sWords.size(), dict, sWords, null);
|
||||||
|
File file = BinaryDictUtils.getDictFile(dictName, dictVersion, formatOptions,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
timeWritingDictToFile(file, dict, formatOptions);
|
||||||
|
|
||||||
|
if (compress) {
|
||||||
|
final File rawFile = file;
|
||||||
|
file = BinaryDictUtils.getDictFile(dictName + "compress", dictVersion, formatOptions,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
final Compress.Compressor compressCommand = new Compress.Compressor();
|
||||||
|
compressCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
|
||||||
|
compressCommand.run();
|
||||||
|
}
|
||||||
|
if (crypt) {
|
||||||
|
final File rawFile = file;
|
||||||
|
file = BinaryDictUtils.getDictFile(dictName + "crypt", dictVersion, formatOptions,
|
||||||
|
getContext().getCacheDir());
|
||||||
|
final Crypt.Encrypter cryptCommand = new Crypt.Encrypter();
|
||||||
|
cryptCommand.setArgs(new String[] { rawFile.getPath(), file.getPath() });
|
||||||
|
cryptCommand.run();
|
||||||
|
}
|
||||||
|
|
||||||
|
final DecoderChainSpec<DictionaryHeader> spec =
|
||||||
|
BinaryDictOffdeviceUtils.decodeDictionaryForProcess(file,
|
||||||
|
new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
|
||||||
|
assertNotNull("Can't decode a dictionary we just wrote : " + file, spec);
|
||||||
|
final DictionaryHeader header = spec.mResult;
|
||||||
|
assertEquals("raw" + (crypt ? " > encryption" : "") + (compress ? " > compression" : ""),
|
||||||
|
spec.describeChain());
|
||||||
|
assertEquals(header.mDictionaryOptions.mAttributes, options);
|
||||||
|
}
|
||||||
|
|
||||||
|
public void testHeaderReaderProcessor() throws IOException {
|
||||||
|
runTestHeaderReaderProcessorWithOneSpec(false /* compress */, false /* crypt */);
|
||||||
|
runTestHeaderReaderProcessorWithOneSpec(true /* compress */, false /* crypt */);
|
||||||
|
runTestHeaderReaderProcessorWithOneSpec(true /* compress */, true /* crypt */);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -65,7 +65,7 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
};
|
};
|
||||||
|
|
||||||
private final int mDecoderSpecIndex;
|
private final int mDecoderSpecIndex;
|
||||||
T mResult;
|
public T mResult;
|
||||||
|
|
||||||
public DecoderChainSpec() {
|
public DecoderChainSpec() {
|
||||||
mDecoderSpecIndex = 0;
|
mDecoderSpecIndex = 0;
|
||||||
|
@ -174,12 +174,13 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
}
|
}
|
||||||
final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
|
final int version = ((tmpBuffer[VERSION_START_OFFSET] & 0xFF) << 8)
|
||||||
+ (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
|
+ (tmpBuffer[VERSION_START_OFFSET + 1] & 0xFF);
|
||||||
if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201) {
|
if (version != FormatSpec.VERSION2 && version != FormatSpec.VERSION201
|
||||||
throw new UnsupportedFormatException("Only versions 2 and 201 are supported");
|
&& version != FormatSpec.VERSION202) {
|
||||||
|
throw new UnsupportedFormatException("Only versions 2, 201, 202 are supported");
|
||||||
}
|
}
|
||||||
final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) >> 24)
|
final int totalHeaderSize = ((tmpBuffer[HEADER_SIZE_OFFSET] & 0xFF) << 24)
|
||||||
+ ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) >> 16)
|
+ ((tmpBuffer[HEADER_SIZE_OFFSET + 1] & 0xFF) << 16)
|
||||||
+ ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) >> 8)
|
+ ((tmpBuffer[HEADER_SIZE_OFFSET + 2] & 0xFF) << 8)
|
||||||
+ (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
|
+ (tmpBuffer[HEADER_SIZE_OFFSET + 3] & 0xFF);
|
||||||
if (totalHeaderSize > MAX_HEADER_LENGTH) {
|
if (totalHeaderSize > MAX_HEADER_LENGTH) {
|
||||||
throw new UnsupportedFormatException("Header too large");
|
throw new UnsupportedFormatException("Header too large");
|
||||||
|
@ -215,11 +216,22 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
@Nonnull DecoderChainSpec spec = new DecoderChainSpec();
|
@Nonnull DecoderChainSpec spec = new DecoderChainSpec();
|
||||||
while (null != spec) {
|
while (null != spec) {
|
||||||
try {
|
try {
|
||||||
try (final InputStream input = spec.getStream(src)) {
|
final InputStream input = spec.getStream(src);
|
||||||
spec.mResult = processor.process(input);
|
spec.mResult = processor.process(input);
|
||||||
return spec;
|
try {
|
||||||
|
input.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// CipherInputStream doesn't like being closed without having read the
|
||||||
|
// entire stream, for some reason. But we don't want to because it's a waste
|
||||||
|
// of resources. We really, really don't care about this.
|
||||||
|
// However on close() CipherInputStream does throw this exception, wrapped
|
||||||
|
// in an IOException so we need to catch it.
|
||||||
|
if (!(e.getCause() instanceof javax.crypto.BadPaddingException)) {
|
||||||
|
throw e;
|
||||||
}
|
}
|
||||||
} catch (IOException | UnsupportedFormatException e) {
|
}
|
||||||
|
return spec;
|
||||||
|
} catch (IOException | UnsupportedFormatException | ArrayIndexOutOfBoundsException e) {
|
||||||
// If the format is not the right one for this file, the processor will throw one
|
// If the format is not the right one for this file, the processor will throw one
|
||||||
// of these exceptions. In our case, that means we should try the next spec,
|
// of these exceptions. In our case, that means we should try the next spec,
|
||||||
// since it may still be at another format we haven't tried yet.
|
// since it may still be at another format we haven't tried yet.
|
||||||
|
|
|
@ -20,6 +20,7 @@ public class CommandList {
|
||||||
public static void populate() {
|
public static void populate() {
|
||||||
// TODO: Move some commands to native code.
|
// TODO: Move some commands to native code.
|
||||||
Dicttool.addCommand("info", Info.class);
|
Dicttool.addCommand("info", Info.class);
|
||||||
|
Dicttool.addCommand("header", Header.class);
|
||||||
Dicttool.addCommand("diff", Diff.class);
|
Dicttool.addCommand("diff", Diff.class);
|
||||||
Dicttool.addCommand("compress", Compress.Compressor.class);
|
Dicttool.addCommand("compress", Compress.Compressor.class);
|
||||||
Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
|
Dicttool.addCommand("uncompress", Compress.Uncompressor.class);
|
||||||
|
|
|
@ -0,0 +1,69 @@
|
||||||
|
/**
|
||||||
|
* Copyright (C) 2014 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
|
||||||
|
* use this file except in compliance with the License. You may obtain a copy of
|
||||||
|
* the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
|
||||||
|
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
|
||||||
|
* License for the specific language governing permissions and limitations under
|
||||||
|
* the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.BinaryDictionary;
|
||||||
|
import com.android.inputmethod.latin.dicttool.BinaryDictOffdeviceUtils.DecoderChainSpec;
|
||||||
|
import com.android.inputmethod.latin.makedict.DictionaryHeader;
|
||||||
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
|
|
||||||
|
import java.io.File;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.Locale;
|
||||||
|
|
||||||
|
public class Header extends Dicttool.Command {
|
||||||
|
public static final String COMMAND = "header";
|
||||||
|
|
||||||
|
public Header() {
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public String getHelp() {
|
||||||
|
return COMMAND + " <filename>: prints the header contents of a dictionary file";
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void run() throws UnsupportedFormatException {
|
||||||
|
final boolean plumbing;
|
||||||
|
if (mArgs.length > 0 && "-p".equals(mArgs[0])) {
|
||||||
|
plumbing = true;
|
||||||
|
mArgs = Arrays.copyOfRange(mArgs, 1, mArgs.length);
|
||||||
|
} else {
|
||||||
|
plumbing = false;
|
||||||
|
}
|
||||||
|
if (mArgs.length < 1) {
|
||||||
|
throw new RuntimeException("Not enough arguments for command " + COMMAND);
|
||||||
|
}
|
||||||
|
final String filename = mArgs[0];
|
||||||
|
final File dictFile = new File(filename);
|
||||||
|
final DecoderChainSpec<DictionaryHeader> spec =
|
||||||
|
BinaryDictOffdeviceUtils.decodeDictionaryForProcess(dictFile,
|
||||||
|
new BinaryDictOffdeviceUtils.HeaderReaderProcessor());
|
||||||
|
if (null == spec) {
|
||||||
|
throw new UnsupportedFormatException(filename
|
||||||
|
+ " doesn't seem to be a valid version 2 dictionary file");
|
||||||
|
}
|
||||||
|
|
||||||
|
final DictionaryHeader header = spec.mResult;
|
||||||
|
System.out.println("Dictionary : " + dictFile.getAbsolutePath());
|
||||||
|
System.out.println("Size : " + dictFile.length() + " bytes");
|
||||||
|
System.out.println("Format : Binary dictionary format");
|
||||||
|
System.out.println("Packaging : " + spec.describeChain());
|
||||||
|
System.out.println("Header attributes :");
|
||||||
|
System.out.print(header.mDictionaryOptions.toString(2 /* indentCount */, plumbing));
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue