Merge "Change binary dictionary output buffer size to match dictionary size."

2012-04-19 17:18:59 -07:00 · 2012-04-19 17:18:59 -07:00 · 7cdb560513
commit 7cdb560513
parent 9d9b44dc67 df7ebbbd61
1 changed files with 65 additions and 32 deletions
--- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
+++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java
@ -21,6 +21,7 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions
 import com.android.inputmethod.latin.makedict.FusionDictionary.Node;
 import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
 import java.io.ByteArrayOutputStream;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.OutputStream;
@ -271,6 +272,29 @@ public class BinaryDictInputOutput {
            return index - origin;
        }
        /**
         * Writes a string with our character format to a ByteArrayOutputStream.
         *
         * This will also write the terminator byte.
         *
         * @param buffer the ByteArrayOutputStream to write to.
         * @param word the string to write.
         */
        private static void writeString(ByteArrayOutputStream buffer, final String word) {
            final int length = word.length();
            for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
                final int codePoint = word.codePointAt(i);
                if (1 == getCharSize(codePoint)) {
                    buffer.write((byte) codePoint);
                } else {
                    buffer.write((byte) (0xFF & (codePoint >> 16)));
                    buffer.write((byte) (0xFF & (codePoint >> 8)));
                    buffer.write((byte) (0xFF & codePoint));
                }
            }
            buffer.write(GROUP_CHARACTERS_TERMINATOR);
        }
        /**
         * Reads a string from a RandomAccessFile. This is the converse of the above method.
         */
@ -894,15 +918,11 @@ public class BinaryDictInputOutput {
            final FusionDictionary dict, final int version)
            throws IOException, UnsupportedFormatException {
-        // Addresses are limited to 3 bytes, so we'll just make a 16MB buffer. Since addresses
+        // Addresses are limited to 3 bytes, but since addresses can be relative to each node, the
-        // can be relative to each node, the structure itself is not limited to 16MB at all, but
+        // structure itself is not limited to 16MB. However, if it is over 16MB deciding the order
-        // I doubt this will ever be shot. If it is, deciding the order of the nodes becomes
+        // of the nodes becomes a quite complicated problem, because though the dictionary itself
-        // a quite complicated problem, because though the dictionary itself does not have a
+        // does not have a size limit, each node must still be within 16MB of all its children and
-        // size limit, each node must still be within 16MB of all its children and parents.
+        // parents. As long as this is ensured, the dictionary file may grow to any size.
        // As long as this is ensured, the dictionary file may grow to any size.
        // Anyway, to make a dictionary bigger than 16MB just increase the size of this buffer.
        final byte[] buffer = new byte[1 << 24];
        int index = 0;
        if (version < MINIMUM_SUPPORTED_VERSION || version > MAXIMUM_SUPPORTED_VERSION) {
            throw new UnsupportedFormatException("Requested file format version " + version
@ -910,47 +930,54 @@ public class BinaryDictInputOutput {
                    + MINIMUM_SUPPORTED_VERSION + " through " + MAXIMUM_SUPPORTED_VERSION);
        }
        ByteArrayOutputStream headerBuffer = new ByteArrayOutputStream(256);
        // The magic number in big-endian order.
        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
            // Magic number for version 2+.
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 24)));
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 16)));
-            buffer[index++] = (byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8));
+            headerBuffer.write((byte) (0xFF & (VERSION_2_MAGIC_NUMBER >> 8)));
-            buffer[index++] = (byte) (0xFF & VERSION_2_MAGIC_NUMBER);
+            headerBuffer.write((byte) (0xFF & VERSION_2_MAGIC_NUMBER));
            // Dictionary version.
-            buffer[index++] = (byte) (0xFF & (version >> 8));
+            headerBuffer.write((byte) (0xFF & (version >> 8)));
-            buffer[index++] = (byte) (0xFF & version);
+            headerBuffer.write((byte) (0xFF & version));
        } else {
            // Magic number for version 1.
-            buffer[index++] = (byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8));
+            headerBuffer.write((byte) (0xFF & (VERSION_1_MAGIC_NUMBER >> 8)));
-            buffer[index++] = (byte) (0xFF & VERSION_1_MAGIC_NUMBER);
+            headerBuffer.write((byte) (0xFF & VERSION_1_MAGIC_NUMBER));
            // Dictionary version.
-            buffer[index++] = (byte) (0xFF & version);
+            headerBuffer.write((byte) (0xFF & version));
        }
        // Options flags
        final int options = makeOptionsValue(dict.mOptions);
-        buffer[index++] = (byte) (0xFF & (options >> 8));
+        headerBuffer.write((byte) (0xFF & (options >> 8)));
-        buffer[index++] = (byte) (0xFF & options);
+        headerBuffer.write((byte) (0xFF & options));
        if (version >= FIRST_VERSION_WITH_HEADER_SIZE) {
-            final int headerSizeOffset = index;
+            final int headerSizeOffset = headerBuffer.size();
-            index += 4; // Size of the header size
+            // Placeholder to be written later with header size.
-
+            for (int i = 0; i < 4; ++i) {
                headerBuffer.write(0);
            }
            // Write out the options.
            for (final String key : dict.mOptions.mAttributes.keySet()) {
                final String value = dict.mOptions.mAttributes.get(key);
-                index += CharEncoding.writeString(buffer, index, key);
+                CharEncoding.writeString(headerBuffer, key);
-                index += CharEncoding.writeString(buffer, index, value);
+                CharEncoding.writeString(headerBuffer, value);
            }
-
+            final int size = headerBuffer.size();
            final byte[] bytes = headerBuffer.toByteArray();
            // Write out the header size.
-            buffer[headerSizeOffset] = (byte) (0xFF & (index >> 24));
+            bytes[headerSizeOffset] = (byte) (0xFF & (size >> 24));
-            buffer[headerSizeOffset + 1] = (byte) (0xFF & (index >> 16));
+            bytes[headerSizeOffset + 1] = (byte) (0xFF & (size >> 16));
-            buffer[headerSizeOffset + 2] = (byte) (0xFF & (index >> 8));
+            bytes[headerSizeOffset + 2] = (byte) (0xFF & (size >> 8));
-            buffer[headerSizeOffset + 3] = (byte) (0xFF & (index >> 0));
+            bytes[headerSizeOffset + 3] = (byte) (0xFF & (size >> 0));
            destination.write(bytes);
        } else {
            headerBuffer.writeTo(destination);
        }
-        destination.write(buffer, 0, index);
+        headerBuffer.close();
        index = 0;
        // Leave the choice of the optimal node order to the flattenTree function.
        MakedictLog.i("Flattening the tree...");
@ -961,6 +988,12 @@ public class BinaryDictInputOutput {
        MakedictLog.i("Checking array...");
        checkFlatNodeArray(flatNodes);
        // Create a buffer that matches the final dictionary size.
        final Node lastNode = flatNodes.get(flatNodes.size() - 1);
        final int bufferSize =(lastNode.mCachedAddress + lastNode.mCachedSize);
        final byte[] buffer = new byte[bufferSize];
        int index = 0;
        MakedictLog.i("Writing file...");
        int dataEndOffset = 0;
        for (Node n : flatNodes) {