Add FormatOptions.

Change-Id: Ibad05a5f9143de1156b2c897593ec89b0a0b07e7
main
Yuichiro Hanada 2012-09-05 12:37:56 +09:00
parent 8c220a0aa2
commit 83dfe0fd8c
6 changed files with 69 additions and 28 deletions

View File

@ -89,6 +89,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
/** Controls access to the local binary dictionary for this instance. */ /** Controls access to the local binary dictionary for this instance. */
private final DictionaryController mLocalDictionaryController = new DictionaryController(); private final DictionaryController mLocalDictionaryController = new DictionaryController();
private static final int BINARY_DICT_VERSION = 1;
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
new BinaryDictInputOutput.FormatOptions(BINARY_DICT_VERSION);
/** /**
* Abstract method for loading the unigrams and bigrams of a given dictionary in a background * Abstract method for loading the unigrams and bigrams of a given dictionary in a background
* thread. * thread.
@ -310,7 +314,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
FileOutputStream out = null; FileOutputStream out = null;
try { try {
out = new FileOutputStream(tempFile); out = new FileOutputStream(tempFile);
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, 1); BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, FORMAT_OPTIONS);
out.flush(); out.flush();
out.close(); out.close();
tempFile.renameTo(file); tempFile.renameTo(file);

View File

@ -19,6 +19,7 @@ package com.android.inputmethod.latin;
import android.util.Log; import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FormatOptions;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
@ -97,12 +98,12 @@ public class UserHistoryDictIOUtils {
*/ */
public static void writeDictionaryBinary(final OutputStream destination, public static void writeDictionaryBinary(final OutputStream destination,
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams, final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
final int version) { final FormatOptions formatOptions) {
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
try { try {
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version); BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "IO exception while writing file: " + e); Log.e(TAG, "IO exception while writing file: " + e);
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {

View File

@ -241,6 +241,31 @@ public class BinaryDictInputOutput {
} }
} }
/**
* Options about file format.
*/
public static class FormatOptions {
public final int mVersion;
public FormatOptions(final int version) {
mVersion = version;
}
}
/**
* Class representing file header.
*/
private static final class FileHeader {
public final int mHeaderSize;
public final DictionaryOptions mDictionaryOptions;
public final FormatOptions mFormatOptions;
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
final FormatOptions formatOptions) {
mHeaderSize = headerSize;
mDictionaryOptions = dictionaryOptions;
mFormatOptions = formatOptions;
}
}
/** /**
* A class grouping utility function for our specific character encoding. * A class grouping utility function for our specific character encoding.
*/ */
@ -1051,10 +1076,10 @@ public class BinaryDictInputOutput {
* *
* @param destination the stream to write the binary data to. * @param destination the stream to write the binary data to.
* @param dict the dictionary to write. * @param dict the dictionary to write.
* @param version the version of the format to write, currently either 1 or 2. * @param formatOptions the options of file format.
*/ */
public static void writeDictionaryBinary(final OutputStream destination, public static void writeDictionaryBinary(final OutputStream destination,
final FusionDictionary dict, final int version) final FusionDictionary dict, final FormatOptions formatOptions)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
@ -1063,6 +1088,7 @@ public class BinaryDictInputOutput {
// does not have a size limit, each node must still be within 16MB of all its children and // does not have a size limit, each node must still be within 16MB of all its children and
// parents. As long as this is ensured, the dictionary file may grow to any size. // parents. As long as this is ensured, the dictionary file may grow to any size.
final int version = formatOptions.mVersion;
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) { if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Requested file format version " + version throw new UnsupportedFormatException("Requested file format version " + version
+ ", but this implementation only supports versions " + ", but this implementation only supports versions "
@ -1471,12 +1497,11 @@ public class BinaryDictInputOutput {
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
UnsupportedFormatException { UnsupportedFormatException {
// Read header // Read header
final int version = checkFormatVersion(buffer); FormatOptions formatOptions = null;
final int optionsFlags = buffer.readUnsignedShort(); DictionaryOptions dictionaryOptions = null;
final HashMap<String, String> options = new HashMap<String, String>(); final FileHeader header = readHeader(buffer);
final int headerSize = readHeader(buffer, options, version);
readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams); readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams);
} }
/** /**
@ -1510,25 +1535,35 @@ public class BinaryDictInputOutput {
/** /**
* Reads a header from a buffer. * Reads a header from a buffer.
* @param buffer the buffer to read.
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
private static int readHeader(final FusionDictionaryBufferInterface buffer, private static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
final HashMap<String, String> options, final int version)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final int version = checkFormatVersion(buffer);
final int optionsFlags = buffer.readUnsignedShort();
final HashMap<String, String> attributes = new HashMap<String, String>();
final int headerSize; final int headerSize;
if (version < FIRST_VERSION_WITH_HEADER_SIZE) { if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
headerSize = buffer.position(); headerSize = buffer.position();
} else { } else {
headerSize = buffer.readInt(); headerSize = buffer.readInt();
populateOptions(buffer, headerSize, options); populateOptions(buffer, headerSize, attributes);
buffer.position(headerSize); buffer.position(headerSize);
} }
if (headerSize < 0) { if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative."); throw new UnsupportedFormatException("header size can't be negative.");
} }
return headerSize;
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version));
return header;
} }
/** /**
@ -1569,21 +1604,14 @@ public class BinaryDictInputOutput {
wordCache.clear(); wordCache.clear();
// Read header // Read header
final int version = checkFormatVersion(buffer); final FileHeader header = readHeader(buffer);
final int optionsFlags = buffer.readUnsignedShort();
final HashMap<String, String> options = new HashMap<String, String>();
final int headerSize = readHeader(buffer, options, version);
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>(); Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>(); Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final Node root = readNode( final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping,
buffer, headerSize, reverseNodeMapping, reverseGroupMapping); reverseGroupMapping);
FusionDictionary newDict = new FusionDictionary(root, FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
new FusionDictionary.DictionaryOptions(options,
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
if (null != dict) { if (null != dict) {
for (final Word w : dict) { for (final Word w : dict) {
if (w.mIsBlacklistEntry) { if (w.mIsBlacklistEntry) {

View File

@ -52,6 +52,9 @@ public class BinaryDictIOTests extends AndroidTestCase {
private static final int BIGRAM_FREQ = 50; private static final int BIGRAM_FREQ = 50;
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5; private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
private static final BinaryDictInputOutput.FormatOptions VERSION2 =
new BinaryDictInputOutput.FormatOptions(2);
private static final String[] CHARACTERS = private static final String[] CHARACTERS =
{ {
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
@ -112,7 +115,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
final FileOutputStream out = new FileOutputStream(file); final FileOutputStream out = new FileOutputStream(file);
now = System.currentTimeMillis(); now = System.currentTimeMillis();
BinaryDictInputOutput.writeDictionaryBinary(out, dict, 2); BinaryDictInputOutput.writeDictionaryBinary(out, dict, VERSION2);
diff = System.currentTimeMillis() - now; diff = System.currentTimeMillis() - now;
out.flush(); out.flush();

View File

@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface; import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener; import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
@ -44,6 +45,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
private static final int UNIGRAM_FREQUENCY = 50; private static final int UNIGRAM_FREQUENCY = 50;
private static final int BIGRAM_FREQUENCY = 100; private static final int BIGRAM_FREQUENCY = 100;
private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>(); private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
new BinaryDictInputOutput.FormatOptions(2);
/** /**
* Return same frequency for all words and bigrams * Return same frequency for all words and bigrams
@ -132,7 +135,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
final UserHistoryDictionaryBigramList bigramList) { final UserHistoryDictionaryBigramList bigramList) {
try { try {
final FileOutputStream out = new FileOutputStream(file); final FileOutputStream out = new FileOutputStream(file);
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2); UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, FORMAT_OPTIONS);
out.flush(); out.flush();
out.close(); out.close();
} catch (IOException e) { } catch (IOException e) {

View File

@ -317,8 +317,10 @@ public class DictionaryMaker {
final FusionDictionary dict, final int version) final FusionDictionary dict, final int version)
throws FileNotFoundException, IOException, UnsupportedFormatException { throws FileNotFoundException, IOException, UnsupportedFormatException {
final File outputFile = new File(outputFilename); final File outputFile = new File(outputFilename);
final BinaryDictInputOutput.FormatOptions formatOptions =
new BinaryDictInputOutput.FormatOptions(version);
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict, BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
version); formatOptions);
} }
/** /**