Add FormatOptions.

Change-Id: Ibad05a5f9143de1156b2c897593ec89b0a0b07e7
This commit is contained in:
Yuichiro Hanada 2012-09-05 12:37:56 +09:00
parent 8c220a0aa2
commit 83dfe0fd8c
6 changed files with 69 additions and 28 deletions

View file

@ -89,6 +89,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
/** Controls access to the local binary dictionary for this instance. */
private final DictionaryController mLocalDictionaryController = new DictionaryController();
private static final int BINARY_DICT_VERSION = 1;
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
new BinaryDictInputOutput.FormatOptions(BINARY_DICT_VERSION);
/**
* Abstract method for loading the unigrams and bigrams of a given dictionary in a background
* thread.
@ -310,7 +314,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
FileOutputStream out = null;
try {
out = new FileOutputStream(tempFile);
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, 1);
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, FORMAT_OPTIONS);
out.flush();
out.close();
tempFile.renameTo(file);

View file

@ -19,6 +19,7 @@ package com.android.inputmethod.latin;
import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FormatOptions;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
@ -97,12 +98,12 @@ public class UserHistoryDictIOUtils {
*/
public static void writeDictionaryBinary(final OutputStream destination,
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
final int version) {
final FormatOptions formatOptions) {
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
try {
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version);
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
} catch (IOException e) {
Log.e(TAG, "IO exception while writing file: " + e);
} catch (UnsupportedFormatException e) {

View file

@ -241,6 +241,31 @@ public class BinaryDictInputOutput {
}
}
/**
* Options about file format.
*/
public static class FormatOptions {
public final int mVersion;
public FormatOptions(final int version) {
mVersion = version;
}
}
/**
* Class representing file header.
*/
private static final class FileHeader {
public final int mHeaderSize;
public final DictionaryOptions mDictionaryOptions;
public final FormatOptions mFormatOptions;
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
final FormatOptions formatOptions) {
mHeaderSize = headerSize;
mDictionaryOptions = dictionaryOptions;
mFormatOptions = formatOptions;
}
}
/**
* A class grouping utility function for our specific character encoding.
*/
@ -1051,10 +1076,10 @@ public class BinaryDictInputOutput {
*
* @param destination the stream to write the binary data to.
* @param dict the dictionary to write.
* @param version the version of the format to write, currently either 1 or 2.
* @param formatOptions the options of file format.
*/
public static void writeDictionaryBinary(final OutputStream destination,
final FusionDictionary dict, final int version)
final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
@ -1063,6 +1088,7 @@ public class BinaryDictInputOutput {
// does not have a size limit, each node must still be within 16MB of all its children and
// parents. As long as this is ensured, the dictionary file may grow to any size.
final int version = formatOptions.mVersion;
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Requested file format version " + version
+ ", but this implementation only supports versions "
@ -1471,12 +1497,11 @@ public class BinaryDictInputOutput {
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
UnsupportedFormatException {
// Read header
final int version = checkFormatVersion(buffer);
final int optionsFlags = buffer.readUnsignedShort();
final HashMap<String, String> options = new HashMap<String, String>();
final int headerSize = readHeader(buffer, options, version);
FormatOptions formatOptions = null;
DictionaryOptions dictionaryOptions = null;
final FileHeader header = readHeader(buffer);
readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams);
readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams);
}
/**
@ -1510,25 +1535,35 @@ public class BinaryDictInputOutput {
/**
* Reads a header from a buffer.
* @param buffer the buffer to read.
* @throws IOException
* @throws UnsupportedFormatException
*/
private static int readHeader(final FusionDictionaryBufferInterface buffer,
final HashMap<String, String> options, final int version)
private static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
throws IOException, UnsupportedFormatException {
final int version = checkFormatVersion(buffer);
final int optionsFlags = buffer.readUnsignedShort();
final HashMap<String, String> attributes = new HashMap<String, String>();
final int headerSize;
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
headerSize = buffer.position();
} else {
headerSize = buffer.readInt();
populateOptions(buffer, headerSize, options);
populateOptions(buffer, headerSize, attributes);
buffer.position(headerSize);
}
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
return headerSize;
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version));
return header;
}
/**
@ -1569,21 +1604,14 @@ public class BinaryDictInputOutput {
wordCache.clear();
// Read header
final int version = checkFormatVersion(buffer);
final int optionsFlags = buffer.readUnsignedShort();
final HashMap<String, String> options = new HashMap<String, String>();
final int headerSize = readHeader(buffer, options, version);
final FileHeader header = readHeader(buffer);
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final Node root = readNode(
buffer, headerSize, reverseNodeMapping, reverseGroupMapping);
final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping,
reverseGroupMapping);
FusionDictionary newDict = new FusionDictionary(root,
new FusionDictionary.DictionaryOptions(options,
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
if (null != dict) {
for (final Word w : dict) {
if (w.mIsBlacklistEntry) {

View file

@ -52,6 +52,9 @@ public class BinaryDictIOTests extends AndroidTestCase {
private static final int BIGRAM_FREQ = 50;
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
private static final BinaryDictInputOutput.FormatOptions VERSION2 =
new BinaryDictInputOutput.FormatOptions(2);
private static final String[] CHARACTERS =
{
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
@ -112,7 +115,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
final FileOutputStream out = new FileOutputStream(file);
now = System.currentTimeMillis();
BinaryDictInputOutput.writeDictionaryBinary(out, dict, 2);
BinaryDictInputOutput.writeDictionaryBinary(out, dict, VERSION2);
diff = System.currentTimeMillis() - now;
out.flush();

View file

@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
@ -44,6 +45,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
private static final int UNIGRAM_FREQUENCY = 50;
private static final int BIGRAM_FREQUENCY = 100;
private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
new BinaryDictInputOutput.FormatOptions(2);
/**
* Return same frequency for all words and bigrams
@ -132,7 +135,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList bigramList) {
try {
final FileOutputStream out = new FileOutputStream(file);
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2);
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, FORMAT_OPTIONS);
out.flush();
out.close();
} catch (IOException e) {

View file

@ -317,8 +317,10 @@ public class DictionaryMaker {
final FusionDictionary dict, final int version)
throws FileNotFoundException, IOException, UnsupportedFormatException {
final File outputFile = new File(outputFilename);
final BinaryDictInputOutput.FormatOptions formatOptions =
new BinaryDictInputOutput.FormatOptions(version);
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
version);
formatOptions);
}
/**