From df7ebbbd616fa5aff569d00b16cd3f85ddf2da6d Mon Sep 17 00:00:00 2001 From: Tom Ouyang Date: Wed, 18 Apr 2012 14:15:34 -0700 Subject: [PATCH] Change binary dictionary output buffer size to match dictionary size. Bug: 6355943 Change-Id: Iaab7bc16ba0dbc7bfde70b06e7bd355519838831 --- .../latin/makedict/BinaryDictInputOutput.java | 97 +++++++++++++------ 1 file changed, 65 insertions(+), 32 deletions(-) diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index d22332116..97df98e34 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import java.io.ByteArrayOutputStream; import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; @@ -271,6 +272,29 @@ public class BinaryDictInputOutput { return index - origin; } + /** + * Writes a string with our character format to a ByteArrayOutputStream. + * + * This will also write the terminator byte. + * + * @param buffer the ByteArrayOutputStream to write to. + * @param word the string to write. + */ + private static void writeString(ByteArrayOutputStream buffer, final String word) { + final int length = word.length(); + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + if (1 == getCharSize(codePoint)) { + buffer.write((byte) codePoint); + } else { + buffer.write((byte) (0xFF & (codePoint >> 16))); + buffer.write((byte) (0xFF & (codePoint >> 8))); + buffer.write((byte) (0xFF & codePoint)); + } + } + buffer.write(GROUP_CHARACTERS_TERMINATOR); + } + /** * Reads a string from a RandomAccessFile. This is the converse of the above method. */ @@ -894,15 +918,11 @@ public class BinaryDictInputOutput { final FusionDictionary dict, final int version) throws IOException, UnsupportedFormatException { - // Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses - // can be relative to each node, the structure itself is not limited to 16MB at all, but - // I doubt this will ever be shot. If it is, deciding the order of the nodes becomes - // a quite complicated problem, because though the dictionary itself does not have a - // size limit, each node must still be within 16MB of all its children and parents. - // As long as this is ensured, the dictionary file may grow to any size. - // Anyway, to make a dictionary bigger than 16MB just increase the size of this buffer. - final byte[] buffer = new byte[1 << 24]; - int index = 0; + // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the + // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order + // of the nodes becomes a quite complicated problem, because though the dictionary itself + // does not have a size limit, each node must still be within 16MB of all its children and + // parents. As long as this is ensured, the dictionary file may grow to any size. if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) { throw new UnsupportedFormatException("Requested file format version " + version @@ -910,47 +930,54 @@ public class BinaryDictInputOutput { + MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION); } + ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256); + // The magic number in big-endian order. if (version >= FIRST_VERSION_WITH_HEADER_SIZE) { // Magic number for version 2+. - buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)); - buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)); - buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)); - buffer[index++] = (byte) (0xFF & VERSION_2_MAGIC_NUMBER); + headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24))); + headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16))); + headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8))); + headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER)); // Dictionary version. - buffer[index++] = (byte) (0xFF & (version >> 8)); - buffer[index++] = (byte) (0xFF & version); + headerBuffer.write((byte) (0xFF & (version >> 8))); + headerBuffer.write((byte) (0xFF & version)); } else { // Magic number for version 1. - buffer[index++] = (byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)); - buffer[index++] = (byte) (0xFF & VERSION_1_MAGIC_NUMBER); + headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8))); + headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER)); // Dictionary version. - buffer[index++] = (byte) (0xFF & version); + headerBuffer.write((byte) (0xFF & version)); } // Options flags final int options = makeOptionsValue(dict.mOptions); - buffer[index++] = (byte) (0xFF & (options >> 8)); - buffer[index++] = (byte) (0xFF & options); + headerBuffer.write((byte) (0xFF & (options >> 8))); + headerBuffer.write((byte) (0xFF & options)); if (version >= FIRST_VERSION_WITH_HEADER_SIZE) { - final int headerSizeOffset = index; - index += 4; // Size of the header size - + final int headerSizeOffset = headerBuffer.size(); + // Placeholder to be written later with header size. + for (int i = 0; i < 4; ++i) { + headerBuffer.write(0); + } // Write out the options. for (final String key : dict.mOptions.mAttributes.keySet()) { final String value = dict.mOptions.mAttributes.get(key); - index += CharEncoding.writeString(buffer, index, key); - index += CharEncoding.writeString(buffer, index, value); + CharEncoding.writeString(headerBuffer, key); + CharEncoding.writeString(headerBuffer, value); } - + final int size = headerBuffer.size(); + final byte[] bytes = headerBuffer.toByteArray(); // Write out the header size. - buffer[headerSizeOffset] = (byte) (0xFF & (index >> 24)); - buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 16)); - buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 8)); - buffer[headerSizeOffset + 3] = (byte) (0xFF & (index >> 0)); + bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24)); + bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16)); + bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8)); + bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0)); + destination.write(bytes); + } else { + headerBuffer.writeTo(destination); } - destination.write(buffer, 0, index); - index = 0; + headerBuffer.close(); // Leave the choice of the optimal node order to the flattenTree function. MakedictLog.i("Flattening the tree..."); @@ -961,6 +988,12 @@ public class BinaryDictInputOutput { MakedictLog.i("Checking array..."); checkFlatNodeArray(flatNodes); + // Create a buffer that matches the final dictionary size. + final Node lastNode = flatNodes.get(flatNodes.size() - 1); + final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize); + final byte[] buffer = new byte[bufferSize]; + int index = 0; + MakedictLog.i("Writing file..."); int dataEndOffset = 0; for (Node n : flatNodes) {