Add FormatOptions.
Change-Id: Ibad05a5f9143de1156b2c897593ec89b0a0b07e7
This commit is contained in:
parent
8c220a0aa2
commit
83dfe0fd8c
6 changed files with 69 additions and 28 deletions
|
@ -89,6 +89,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
/** Controls access to the local binary dictionary for this instance. */
|
||||
private final DictionaryController mLocalDictionaryController = new DictionaryController();
|
||||
|
||||
private static final int BINARY_DICT_VERSION = 1;
|
||||
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
|
||||
new BinaryDictInputOutput.FormatOptions(BINARY_DICT_VERSION);
|
||||
|
||||
/**
|
||||
* Abstract method for loading the unigrams and bigrams of a given dictionary in a background
|
||||
* thread.
|
||||
|
@ -310,7 +314,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
|||
FileOutputStream out = null;
|
||||
try {
|
||||
out = new FileOutputStream(tempFile);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, 1);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, FORMAT_OPTIONS);
|
||||
out.flush();
|
||||
out.close();
|
||||
tempFile.renameTo(file);
|
||||
|
|
|
@ -19,6 +19,7 @@ package com.android.inputmethod.latin;
|
|||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||
|
@ -97,12 +98,12 @@ public class UserHistoryDictIOUtils {
|
|||
*/
|
||||
public static void writeDictionaryBinary(final OutputStream destination,
|
||||
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
|
||||
final int version) {
|
||||
final FormatOptions formatOptions) {
|
||||
|
||||
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
|
||||
|
||||
try {
|
||||
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO exception while writing file: " + e);
|
||||
} catch (UnsupportedFormatException e) {
|
||||
|
|
|
@ -241,6 +241,31 @@ public class BinaryDictInputOutput {
|
|||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Options about file format.
|
||||
*/
|
||||
public static class FormatOptions {
|
||||
public final int mVersion;
|
||||
public FormatOptions(final int version) {
|
||||
mVersion = version;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Class representing file header.
|
||||
*/
|
||||
private static final class FileHeader {
|
||||
public final int mHeaderSize;
|
||||
public final DictionaryOptions mDictionaryOptions;
|
||||
public final FormatOptions mFormatOptions;
|
||||
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
|
||||
final FormatOptions formatOptions) {
|
||||
mHeaderSize = headerSize;
|
||||
mDictionaryOptions = dictionaryOptions;
|
||||
mFormatOptions = formatOptions;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class grouping utility function for our specific character encoding.
|
||||
*/
|
||||
|
@ -1051,10 +1076,10 @@ public class BinaryDictInputOutput {
|
|||
*
|
||||
* @param destination the stream to write the binary data to.
|
||||
* @param dict the dictionary to write.
|
||||
* @param version the version of the format to write, currently either 1 or 2.
|
||||
* @param formatOptions the options of file format.
|
||||
*/
|
||||
public static void writeDictionaryBinary(final OutputStream destination,
|
||||
final FusionDictionary dict, final int version)
|
||||
final FusionDictionary dict, final FormatOptions formatOptions)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
|
||||
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
|
||||
|
@ -1063,6 +1088,7 @@ public class BinaryDictInputOutput {
|
|||
// does not have a size limit, each node must still be within 16MB of all its children and
|
||||
// parents. As long as this is ensured, the dictionary file may grow to any size.
|
||||
|
||||
final int version = formatOptions.mVersion;
|
||||
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
|
||||
throw new UnsupportedFormatException("Requested file format version " + version
|
||||
+ ", but this implementation only supports versions "
|
||||
|
@ -1471,12 +1497,11 @@ public class BinaryDictInputOutput {
|
|||
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
||||
UnsupportedFormatException {
|
||||
// Read header
|
||||
final int version = checkFormatVersion(buffer);
|
||||
final int optionsFlags = buffer.readUnsignedShort();
|
||||
final HashMap<String, String> options = new HashMap<String, String>();
|
||||
final int headerSize = readHeader(buffer, options, version);
|
||||
FormatOptions formatOptions = null;
|
||||
DictionaryOptions dictionaryOptions = null;
|
||||
final FileHeader header = readHeader(buffer);
|
||||
|
||||
readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams);
|
||||
readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams);
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1510,25 +1535,35 @@ public class BinaryDictInputOutput {
|
|||
|
||||
/**
|
||||
* Reads a header from a buffer.
|
||||
* @param buffer the buffer to read.
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
private static int readHeader(final FusionDictionaryBufferInterface buffer,
|
||||
final HashMap<String, String> options, final int version)
|
||||
private static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = checkFormatVersion(buffer);
|
||||
final int optionsFlags = buffer.readUnsignedShort();
|
||||
|
||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||
final int headerSize;
|
||||
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
|
||||
headerSize = buffer.position();
|
||||
} else {
|
||||
headerSize = buffer.readInt();
|
||||
populateOptions(buffer, headerSize, options);
|
||||
populateOptions(buffer, headerSize, attributes);
|
||||
buffer.position(headerSize);
|
||||
}
|
||||
|
||||
if (headerSize < 0) {
|
||||
throw new UnsupportedFormatException("header size can't be negative.");
|
||||
}
|
||||
return headerSize;
|
||||
|
||||
final FileHeader header = new FileHeader(headerSize,
|
||||
new FusionDictionary.DictionaryOptions(attributes,
|
||||
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)),
|
||||
new FormatOptions(version));
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
|
@ -1569,21 +1604,14 @@ public class BinaryDictInputOutput {
|
|||
wordCache.clear();
|
||||
|
||||
// Read header
|
||||
final int version = checkFormatVersion(buffer);
|
||||
final int optionsFlags = buffer.readUnsignedShort();
|
||||
|
||||
final HashMap<String, String> options = new HashMap<String, String>();
|
||||
final int headerSize = readHeader(buffer, options, version);
|
||||
final FileHeader header = readHeader(buffer);
|
||||
|
||||
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||
final Node root = readNode(
|
||||
buffer, headerSize, reverseNodeMapping, reverseGroupMapping);
|
||||
final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping,
|
||||
reverseGroupMapping);
|
||||
|
||||
FusionDictionary newDict = new FusionDictionary(root,
|
||||
new FusionDictionary.DictionaryOptions(options,
|
||||
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
|
||||
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
|
||||
if (null != dict) {
|
||||
for (final Word w : dict) {
|
||||
if (w.mIsBlacklistEntry) {
|
||||
|
|
|
@ -52,6 +52,9 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||
private static final int BIGRAM_FREQ = 50;
|
||||
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
|
||||
|
||||
private static final BinaryDictInputOutput.FormatOptions VERSION2 =
|
||||
new BinaryDictInputOutput.FormatOptions(2);
|
||||
|
||||
private static final String[] CHARACTERS =
|
||||
{
|
||||
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
|
||||
|
@ -112,7 +115,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
|||
final FileOutputStream out = new FileOutputStream(file);
|
||||
|
||||
now = System.currentTimeMillis();
|
||||
BinaryDictInputOutput.writeDictionaryBinary(out, dict, 2);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(out, dict, VERSION2);
|
||||
diff = System.currentTimeMillis() - now;
|
||||
|
||||
out.flush();
|
||||
|
|
|
@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
|
|||
|
||||
import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
|
||||
import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
|
||||
|
@ -44,6 +45,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|||
private static final int UNIGRAM_FREQUENCY = 50;
|
||||
private static final int BIGRAM_FREQUENCY = 100;
|
||||
private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();
|
||||
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
|
||||
new BinaryDictInputOutput.FormatOptions(2);
|
||||
|
||||
/**
|
||||
* Return same frequency for all words and bigrams
|
||||
|
@ -132,7 +135,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|||
final UserHistoryDictionaryBigramList bigramList) {
|
||||
try {
|
||||
final FileOutputStream out = new FileOutputStream(file);
|
||||
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2);
|
||||
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, FORMAT_OPTIONS);
|
||||
out.flush();
|
||||
out.close();
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -317,8 +317,10 @@ public class DictionaryMaker {
|
|||
final FusionDictionary dict, final int version)
|
||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||
final File outputFile = new File(outputFilename);
|
||||
final BinaryDictInputOutput.FormatOptions formatOptions =
|
||||
new BinaryDictInputOutput.FormatOptions(version);
|
||||
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
|
||||
version);
|
||||
formatOptions);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
Loading…
Reference in a new issue