Merge "Change binary dictionary output buffer size to match dictionary size."

This commit is contained in:
Tom Ouyang 2012-04-19 17:18:59 -07:00 committed by Android (Google) Code Review
commit 7cdb560513

View file

@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.io.ByteArrayOutputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.OutputStream; import java.io.OutputStream;
@ -271,6 +272,29 @@ public class BinaryDictInputOutput {
return index - origin; return index - origin;
} }
/**
* Writes a string with our character format to a ByteArrayOutputStream.
*
* This will also write the terminator byte.
*
* @param buffer the ByteArrayOutputStream to write to.
* @param word the string to write.
*/
private static void writeString(ByteArrayOutputStream buffer, final String word) {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer.write((byte) codePoint);
} else {
buffer.write((byte) (0xFF & (codePoint >> 16)));
buffer.write((byte) (0xFF & (codePoint >> 8)));
buffer.write((byte) (0xFF & codePoint));
}
}
buffer.write(GROUP_CHARACTERS_TERMINATOR);
}
/** /**
* Reads a string from a RandomAccessFile. This is the converse of the above method. * Reads a string from a RandomAccessFile. This is the converse of the above method.
*/ */
@ -894,15 +918,11 @@ public class BinaryDictInputOutput {
final FusionDictionary dict, final int version) final FusionDictionary dict, final int version)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
// Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
// can be relative to each node, the structure itself is not limited to 16MB at all, but // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
// I doubt this will ever be shot. If it is, deciding the order of the nodes becomes // of the nodes becomes a quite complicated problem, because though the dictionary itself
// a quite complicated problem, because though the dictionary itself does not have a // does not have a size limit, each node must still be within 16MB of all its children and
// size limit, each node must still be within 16MB of all its children and parents. // parents. As long as this is ensured, the dictionary file may grow to any size.
// As long as this is ensured, the dictionary file may grow to any size.
// Anyway, to make a dictionary bigger than 16MB just increase the size of this buffer.
final byte[] buffer = new byte[1 << 24];
int index = 0;
if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) { if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("Requested file format version " + version throw new UnsupportedFormatException("Requested file format version " + version
@ -910,47 +930,54 @@ public class BinaryDictInputOutput {
+ MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION); + MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
} }
ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
// The magic number in big-endian order. // The magic number in big-endian order.
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) { if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
// Magic number for version 2+. // Magic number for version 2+.
buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)); headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)));
buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)); headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)));
buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)); headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)));
buffer[index++] = (byte) (0xFF & VERSION_2_MAGIC_NUMBER); headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER));
// Dictionary version. // Dictionary version.
buffer[index++] = (byte) (0xFF & (version >> 8)); headerBuffer.write((byte) (0xFF & (version >> 8)));
buffer[index++] = (byte) (0xFF & version); headerBuffer.write((byte) (0xFF & version));
} else { } else {
// Magic number for version 1. // Magic number for version 1.
buffer[index++] = (byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)); headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)));
buffer[index++] = (byte) (0xFF & VERSION_1_MAGIC_NUMBER); headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER));
// Dictionary version. // Dictionary version.
buffer[index++] = (byte) (0xFF & version); headerBuffer.write((byte) (0xFF & version));
} }
// Options flags // Options flags
final int options = makeOptionsValue(dict.mOptions); final int options = makeOptionsValue(dict.mOptions);
buffer[index++] = (byte) (0xFF & (options >> 8)); headerBuffer.write((byte) (0xFF & (options >> 8)));
buffer[index++] = (byte) (0xFF & options); headerBuffer.write((byte) (0xFF & options));
if (version >= FIRST_VERSION_WITH_HEADER_SIZE) { if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
final int headerSizeOffset = index; final int headerSizeOffset = headerBuffer.size();
index += 4; // Size of the header size // Placeholder to be written later with header size.
for (int i = 0; i < 4; ++i) {
headerBuffer.write(0);
}
// Write out the options. // Write out the options.
for (final String key : dict.mOptions.mAttributes.keySet()) { for (final String key : dict.mOptions.mAttributes.keySet()) {
final String value = dict.mOptions.mAttributes.get(key); final String value = dict.mOptions.mAttributes.get(key);
index += CharEncoding.writeString(buffer, index, key); CharEncoding.writeString(headerBuffer, key);
index += CharEncoding.writeString(buffer, index, value); CharEncoding.writeString(headerBuffer, value);
} }
final int size = headerBuffer.size();
final byte[] bytes = headerBuffer.toByteArray();
// Write out the header size. // Write out the header size.
buffer[headerSizeOffset] = (byte) (0xFF & (index >> 24)); bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 16)); bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 8)); bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
buffer[headerSizeOffset + 3] = (byte) (0xFF & (index >> 0)); bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
destination.write(bytes);
} else {
headerBuffer.writeTo(destination);
} }
destination.write(buffer, 0, index); headerBuffer.close();
index = 0;
// Leave the choice of the optimal node order to the flattenTree function. // Leave the choice of the optimal node order to the flattenTree function.
MakedictLog.i("Flattening the tree..."); MakedictLog.i("Flattening the tree...");
@ -961,6 +988,12 @@ public class BinaryDictInputOutput {
MakedictLog.i("Checking array..."); MakedictLog.i("Checking array...");
checkFlatNodeArray(flatNodes); checkFlatNodeArray(flatNodes);
// Create a buffer that matches the final dictionary size.
final Node lastNode = flatNodes.get(flatNodes.size() - 1);
final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize);
final byte[] buffer = new byte[bufferSize];
int index = 0;
MakedictLog.i("Writing file..."); MakedictLog.i("Writing file...");
int dataEndOffset = 0; int dataEndOffset = 0;
for (Node n : flatNodes) { for (Node n : flatNodes) {