Merge "Change binary dictionary output buffer size to match dictionary size."

2012-04-19 17:18:59 -07:00 · 2012-04-19 17:18:59 -07:00 · 7cdb560513
commit 7cdb560513
parent 9d9b44dc67 df7ebbbd61
1 changed files with 65 additions and 32 deletions
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;

+import java.io.ByteArrayOutputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
@ -271,6 +272,29 @@ public class BinaryDictInputOutput {
            return index - origin;
        }

+        /**
+         * Writes a string with our character format to a ByteArrayOutputStream.
+         *
+         * This will also write the terminator byte.
+         *
+         * @param buffer the ByteArrayOutputStream to write to.
+         * @param word the string to write.
+         */
+        private static void writeString(ByteArrayOutputStream buffer, final String word) {
+            final int length = word.length();
+            for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
+                final int codePoint = word.codePointAt(i);
+                if (1 == getCharSize(codePoint)) {
+                    buffer.write((byte) codePoint);
+                } else {
+                    buffer.write((byte) (0xFF & (codePoint >> 16)));
+                    buffer.write((byte) (0xFF & (codePoint >> 8)));
+                    buffer.write((byte) (0xFF & codePoint));
+                }
+            }
+            buffer.write(GROUP_CHARACTERS_TERMINATOR);
+        }
+
        /**
         * Reads a string from a RandomAccessFile. This is the converse of the above method.
         */
@ -894,15 +918,11 @@ public class BinaryDictInputOutput {
            final FusionDictionary dict, final int version)
            throws IOException, UnsupportedFormatException {

-        // Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
-        // can be relative to each node, the structure itself is not limited to 16MB at all, but
-        // I doubt this will ever be shot. If it is, deciding the order of the nodes becomes
-        // a quite complicated problem, because though the dictionary itself does not have a
-        // size limit, each node must still be within 16MB of all its children and parents.
-        // As long as this is ensured, the dictionary file may grow to any size.
-        // Anyway, to make a dictionary bigger than 16MB just increase the size of this buffer.
-        final byte[] buffer = new byte[1 << 24];
-        int index = 0;
+        // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
+        // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
+        // of the nodes becomes a quite complicated problem, because though the dictionary itself
+        // does not have a size limit, each node must still be within 16MB of all its children and
+        // parents. As long as this is ensured, the dictionary file may grow to any size.

        if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
            throw new UnsupportedFormatException("Requested file format version " + version
@ -910,47 +930,54 @@ public class BinaryDictInputOutput {
                    + MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
        }

+        ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
+
        // The magic number in big-endian order.
        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
            // Magic number for version 2+.
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24));
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16));
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8));
-            buffer[index++] = (byte) (0xFF & VERSION_2_MAGIC_NUMBER);
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)));
+            headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER));
            // Dictionary version.
-            buffer[index++] = (byte) (0xFF & (version >> 8));
-            buffer[index++] = (byte) (0xFF & version);
+            headerBuffer.write((byte) (0xFF & (version >> 8)));
+            headerBuffer.write((byte) (0xFF & version));
        } else {
            // Magic number for version 1.
-            buffer[index++] = (byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8));
-            buffer[index++] = (byte) (0xFF & VERSION_1_MAGIC_NUMBER);
+            headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)));
+            headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER));
            // Dictionary version.
-            buffer[index++] = (byte) (0xFF & version);
+            headerBuffer.write((byte) (0xFF & version));
        }
        // Options flags
        final int options = makeOptionsValue(dict.mOptions);
-        buffer[index++] = (byte) (0xFF & (options >> 8));
-        buffer[index++] = (byte) (0xFF & options);
+        headerBuffer.write((byte) (0xFF & (options >> 8)));
+        headerBuffer.write((byte) (0xFF & options));
        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
-            final int headerSizeOffset = index;
-            index += 4; // Size of the header size
-
+            final int headerSizeOffset = headerBuffer.size();
+            // Placeholder to be written later with header size.
+            for (int i = 0; i < 4; ++i) {
+                headerBuffer.write(0);
+            }
            // Write out the options.
            for (final String key : dict.mOptions.mAttributes.keySet()) {
                final String value = dict.mOptions.mAttributes.get(key);
-                index += CharEncoding.writeString(buffer, index, key);
-                index += CharEncoding.writeString(buffer, index, value);
+                CharEncoding.writeString(headerBuffer, key);
+                CharEncoding.writeString(headerBuffer, value);
            }
-
+            final int size = headerBuffer.size();
+            final byte[] bytes = headerBuffer.toByteArray();
            // Write out the header size.
-            buffer[headerSizeOffset] = (byte) (0xFF & (index >> 24));
-            buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 16));
-            buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 8));
-            buffer[headerSizeOffset + 3] = (byte) (0xFF & (index >> 0));
+            bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
+            bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
+            bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
+            bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
+            destination.write(bytes);
+        } else {
+            headerBuffer.writeTo(destination);
        }

-        destination.write(buffer, 0, index);
-        index = 0;
+        headerBuffer.close();

        // Leave the choice of the optimal node order to the flattenTree function.
        MakedictLog.i("Flattening the tree...");
@ -961,6 +988,12 @@ public class BinaryDictInputOutput {
        MakedictLog.i("Checking array...");
        checkFlatNodeArray(flatNodes);

+        // Create a buffer that matches the final dictionary size.
+        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
+        final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize);
+        final byte[] buffer = new byte[bufferSize];
+        int index = 0;
+
        MakedictLog.i("Writing file...");
        int dataEndOffset = 0;
        for (Node n : flatNodes) {