am 91fd5bb5: am 83dfe0fd: Add FormatOptions.
* commit '91fd5bb568b7aaf8487b3022c5f1f8af846451a1': Add FormatOptions.main
commit
0977610bc9
|
@ -89,6 +89,10 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
/** Controls access to the local binary dictionary for this instance. */
|
/** Controls access to the local binary dictionary for this instance. */
|
||||||
private final DictionaryController mLocalDictionaryController = new DictionaryController();
|
private final DictionaryController mLocalDictionaryController = new DictionaryController();
|
||||||
|
|
||||||
|
private static final int BINARY_DICT_VERSION = 1;
|
||||||
|
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
|
||||||
|
new BinaryDictInputOutput.FormatOptions(BINARY_DICT_VERSION);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Abstract method for loading the unigrams and bigrams of a given dictionary in a background
|
* Abstract method for loading the unigrams and bigrams of a given dictionary in a background
|
||||||
* thread.
|
* thread.
|
||||||
|
@ -310,7 +314,7 @@ abstract public class ExpandableBinaryDictionary extends Dictionary {
|
||||||
FileOutputStream out = null;
|
FileOutputStream out = null;
|
||||||
try {
|
try {
|
||||||
out = new FileOutputStream(tempFile);
|
out = new FileOutputStream(tempFile);
|
||||||
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, 1);
|
BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, FORMAT_OPTIONS);
|
||||||
out.flush();
|
out.flush();
|
||||||
out.close();
|
out.close();
|
||||||
tempFile.renameTo(file);
|
tempFile.renameTo(file);
|
||||||
|
|
|
@ -19,6 +19,7 @@ package com.android.inputmethod.latin;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
|
||||||
|
@ -97,12 +98,12 @@ public class UserHistoryDictIOUtils {
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryBinary(final OutputStream destination,
|
public static void writeDictionaryBinary(final OutputStream destination,
|
||||||
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
|
final BigramDictionaryInterface dict, final UserHistoryDictionaryBigramList bigrams,
|
||||||
final int version) {
|
final FormatOptions formatOptions) {
|
||||||
|
|
||||||
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
|
final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, version);
|
BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "IO exception while writing file: " + e);
|
Log.e(TAG, "IO exception while writing file: " + e);
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
|
|
|
@ -241,6 +241,31 @@ public class BinaryDictInputOutput {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Options about file format.
|
||||||
|
*/
|
||||||
|
public static class FormatOptions {
|
||||||
|
public final int mVersion;
|
||||||
|
public FormatOptions(final int version) {
|
||||||
|
mVersion = version;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Class representing file header.
|
||||||
|
*/
|
||||||
|
private static final class FileHeader {
|
||||||
|
public final int mHeaderSize;
|
||||||
|
public final DictionaryOptions mDictionaryOptions;
|
||||||
|
public final FormatOptions mFormatOptions;
|
||||||
|
public FileHeader(final int headerSize, final DictionaryOptions dictionaryOptions,
|
||||||
|
final FormatOptions formatOptions) {
|
||||||
|
mHeaderSize = headerSize;
|
||||||
|
mDictionaryOptions = dictionaryOptions;
|
||||||
|
mFormatOptions = formatOptions;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* A class grouping utility function for our specific character encoding.
|
* A class grouping utility function for our specific character encoding.
|
||||||
*/
|
*/
|
||||||
|
@ -1051,10 +1076,10 @@ public class BinaryDictInputOutput {
|
||||||
*
|
*
|
||||||
* @param destination the stream to write the binary data to.
|
* @param destination the stream to write the binary data to.
|
||||||
* @param dict the dictionary to write.
|
* @param dict the dictionary to write.
|
||||||
* @param version the version of the format to write, currently either 1 or 2.
|
* @param formatOptions the options of file format.
|
||||||
*/
|
*/
|
||||||
public static void writeDictionaryBinary(final OutputStream destination,
|
public static void writeDictionaryBinary(final OutputStream destination,
|
||||||
final FusionDictionary dict, final int version)
|
final FusionDictionary dict, final FormatOptions formatOptions)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
|
|
||||||
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
|
// Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
|
||||||
|
@ -1063,6 +1088,7 @@ public class BinaryDictInputOutput {
|
||||||
// does not have a size limit, each node must still be within 16MB of all its children and
|
// does not have a size limit, each node must still be within 16MB of all its children and
|
||||||
// parents. As long as this is ensured, the dictionary file may grow to any size.
|
// parents. As long as this is ensured, the dictionary file may grow to any size.
|
||||||
|
|
||||||
|
final int version = formatOptions.mVersion;
|
||||||
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
|
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
|
||||||
throw new UnsupportedFormatException("Requested file format version " + version
|
throw new UnsupportedFormatException("Requested file format version " + version
|
||||||
+ ", but this implementation only supports versions "
|
+ ", but this implementation only supports versions "
|
||||||
|
@ -1471,12 +1497,11 @@ public class BinaryDictInputOutput {
|
||||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
||||||
UnsupportedFormatException {
|
UnsupportedFormatException {
|
||||||
// Read header
|
// Read header
|
||||||
final int version = checkFormatVersion(buffer);
|
FormatOptions formatOptions = null;
|
||||||
final int optionsFlags = buffer.readUnsignedShort();
|
DictionaryOptions dictionaryOptions = null;
|
||||||
final HashMap<String, String> options = new HashMap<String, String>();
|
final FileHeader header = readHeader(buffer);
|
||||||
final int headerSize = readHeader(buffer, options, version);
|
|
||||||
|
|
||||||
readUnigramsAndBigramsBinaryInner(buffer, headerSize, words, frequencies, bigrams);
|
readUnigramsAndBigramsBinaryInner(buffer, header.mHeaderSize, words, frequencies, bigrams);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1510,25 +1535,35 @@ public class BinaryDictInputOutput {
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Reads a header from a buffer.
|
* Reads a header from a buffer.
|
||||||
|
* @param buffer the buffer to read.
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws UnsupportedFormatException
|
* @throws UnsupportedFormatException
|
||||||
*/
|
*/
|
||||||
private static int readHeader(final FusionDictionaryBufferInterface buffer,
|
private static FileHeader readHeader(final FusionDictionaryBufferInterface buffer)
|
||||||
final HashMap<String, String> options, final int version)
|
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
|
final int version = checkFormatVersion(buffer);
|
||||||
|
final int optionsFlags = buffer.readUnsignedShort();
|
||||||
|
|
||||||
|
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||||
final int headerSize;
|
final int headerSize;
|
||||||
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
|
if (version < FIRST_VERSION_WITH_HEADER_SIZE) {
|
||||||
headerSize = buffer.position();
|
headerSize = buffer.position();
|
||||||
} else {
|
} else {
|
||||||
headerSize = buffer.readInt();
|
headerSize = buffer.readInt();
|
||||||
populateOptions(buffer, headerSize, options);
|
populateOptions(buffer, headerSize, attributes);
|
||||||
buffer.position(headerSize);
|
buffer.position(headerSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (headerSize < 0) {
|
if (headerSize < 0) {
|
||||||
throw new UnsupportedFormatException("header size can't be negative.");
|
throw new UnsupportedFormatException("header size can't be negative.");
|
||||||
}
|
}
|
||||||
return headerSize;
|
|
||||||
|
final FileHeader header = new FileHeader(headerSize,
|
||||||
|
new FusionDictionary.DictionaryOptions(attributes,
|
||||||
|
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||||
|
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)),
|
||||||
|
new FormatOptions(version));
|
||||||
|
return header;
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -1569,21 +1604,14 @@ public class BinaryDictInputOutput {
|
||||||
wordCache.clear();
|
wordCache.clear();
|
||||||
|
|
||||||
// Read header
|
// Read header
|
||||||
final int version = checkFormatVersion(buffer);
|
final FileHeader header = readHeader(buffer);
|
||||||
final int optionsFlags = buffer.readUnsignedShort();
|
|
||||||
|
|
||||||
final HashMap<String, String> options = new HashMap<String, String>();
|
|
||||||
final int headerSize = readHeader(buffer, options, version);
|
|
||||||
|
|
||||||
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
Map<Integer, Node> reverseNodeMapping = new TreeMap<Integer, Node>();
|
||||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||||
final Node root = readNode(
|
final Node root = readNode(buffer, header.mHeaderSize, reverseNodeMapping,
|
||||||
buffer, headerSize, reverseNodeMapping, reverseGroupMapping);
|
reverseGroupMapping);
|
||||||
|
|
||||||
FusionDictionary newDict = new FusionDictionary(root,
|
FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions);
|
||||||
new FusionDictionary.DictionaryOptions(options,
|
|
||||||
0 != (optionsFlags & GERMAN_UMLAUT_PROCESSING_FLAG),
|
|
||||||
0 != (optionsFlags & FRENCH_LIGATURE_PROCESSING_FLAG)));
|
|
||||||
if (null != dict) {
|
if (null != dict) {
|
||||||
for (final Word w : dict) {
|
for (final Word w : dict) {
|
||||||
if (w.mIsBlacklistEntry) {
|
if (w.mIsBlacklistEntry) {
|
||||||
|
|
|
@ -52,6 +52,9 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
||||||
private static final int BIGRAM_FREQ = 50;
|
private static final int BIGRAM_FREQ = 50;
|
||||||
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
|
private static final int TOLERANCE_OF_BIGRAM_FREQ = 5;
|
||||||
|
|
||||||
|
private static final BinaryDictInputOutput.FormatOptions VERSION2 =
|
||||||
|
new BinaryDictInputOutput.FormatOptions(2);
|
||||||
|
|
||||||
private static final String[] CHARACTERS =
|
private static final String[] CHARACTERS =
|
||||||
{
|
{
|
||||||
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
|
"a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m",
|
||||||
|
@ -112,7 +115,7 @@ public class BinaryDictIOTests extends AndroidTestCase {
|
||||||
final FileOutputStream out = new FileOutputStream(file);
|
final FileOutputStream out = new FileOutputStream(file);
|
||||||
|
|
||||||
now = System.currentTimeMillis();
|
now = System.currentTimeMillis();
|
||||||
BinaryDictInputOutput.writeDictionaryBinary(out, dict, 2);
|
BinaryDictInputOutput.writeDictionaryBinary(out, dict, VERSION2);
|
||||||
diff = System.currentTimeMillis() - now;
|
diff = System.currentTimeMillis() - now;
|
||||||
|
|
||||||
out.flush();
|
out.flush();
|
||||||
|
|
|
@ -18,6 +18,7 @@ package com.android.inputmethod.latin;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
|
import com.android.inputmethod.latin.UserHistoryDictIOUtils.BigramDictionaryInterface;
|
||||||
import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
|
import com.android.inputmethod.latin.UserHistoryDictIOUtils.OnAddWordListener;
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictInputOutput;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
|
|
||||||
|
@ -44,6 +45,8 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
||||||
private static final int UNIGRAM_FREQUENCY = 50;
|
private static final int UNIGRAM_FREQUENCY = 50;
|
||||||
private static final int BIGRAM_FREQUENCY = 100;
|
private static final int BIGRAM_FREQUENCY = 100;
|
||||||
private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();
|
private static final ArrayList<String> NOT_HAVE_BIGRAM = new ArrayList<String>();
|
||||||
|
private static final BinaryDictInputOutput.FormatOptions FORMAT_OPTIONS =
|
||||||
|
new BinaryDictInputOutput.FormatOptions(2);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return same frequency for all words and bigrams
|
* Return same frequency for all words and bigrams
|
||||||
|
@ -132,7 +135,7 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
||||||
final UserHistoryDictionaryBigramList bigramList) {
|
final UserHistoryDictionaryBigramList bigramList) {
|
||||||
try {
|
try {
|
||||||
final FileOutputStream out = new FileOutputStream(file);
|
final FileOutputStream out = new FileOutputStream(file);
|
||||||
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, 2);
|
UserHistoryDictIOUtils.writeDictionaryBinary(out, this, bigramList, FORMAT_OPTIONS);
|
||||||
out.flush();
|
out.flush();
|
||||||
out.close();
|
out.close();
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
@ -317,8 +317,10 @@ public class DictionaryMaker {
|
||||||
final FusionDictionary dict, final int version)
|
final FusionDictionary dict, final int version)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
final File outputFile = new File(outputFilename);
|
final File outputFile = new File(outputFilename);
|
||||||
|
final BinaryDictInputOutput.FormatOptions formatOptions =
|
||||||
|
new BinaryDictInputOutput.FormatOptions(version);
|
||||||
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
|
BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict,
|
||||||
version);
|
formatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
Loading…
Reference in New Issue