From 117f18e844f83036f3523aa2e721894bac16739d Mon Sep 17 00:00:00 2001 From: Ken Wakasa Date: Thu, 15 Aug 2013 08:54:29 +0000 Subject: [PATCH] Revert "[Refactor] Divide BinaryDictInputOutput into BinaryDictInputUtils and BinaryDictOutputUtils." This reverts commit 4c63d0614e7ed7aea4bcbab3a17090d841661d92. Change-Id: I1fa277d720bab4d895259df7d6d82eebfa5eb6c5 --- .../latin/BinaryDictionaryGetter.java | 8 +- .../inputmethod/latin/DictionaryWriter.java | 4 +- .../latin/makedict/BinaryDictIOUtils.java | 68 +- ...tUtils.java => BinaryDictInputOutput.java} | 828 +++++++++++++++++- .../latin/makedict/BinaryDictInputUtils.java | 811 ----------------- .../latin/makedict/BinaryDictReader.java | 6 +- .../makedict/DynamicBinaryDictIOUtils.java | 30 +- .../latin/utils/ByteArrayWrapper.java | 2 +- .../latin/utils/UserHistoryDictIOUtils.java | 7 +- .../core/dictionary/probability_utils.h | 2 +- .../latin/makedict/BinaryDictIOTests.java | 14 +- .../makedict/BinaryDictIOUtilsTests.java | 20 +- .../latin/makedict/BinaryDictReaderTests.java | 2 +- .../dicttool/BinaryDictOffdeviceUtils.java | 6 +- .../latin/dicttool/DictionaryMaker.java | 11 +- .../BinaryDictOffdeviceUtilsTests.java | 7 +- .../makedict/BinaryDictInputOutputTest.java | 4 +- .../latin/makedict/FusionDictionaryTest.java | 2 +- 18 files changed, 892 insertions(+), 940 deletions(-) rename java/src/com/android/inputmethod/latin/makedict/{BinaryDictOutputUtils.java => BinaryDictInputOutput.java} (57%) delete mode 100644 java/src/com/android/inputmethod/latin/makedict/BinaryDictInputUtils.java diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java index d1d829ad1..d0a4afd50 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java @@ -21,7 +21,7 @@ import android.content.SharedPreferences; import android.content.res.AssetFileDescriptor; import android.util.Log; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils; @@ -231,8 +231,8 @@ final public class BinaryDictionaryGetter { try { // Read the version of the file inStream = new FileInputStream(f); - final BinaryDictInputUtils.ByteBufferWrapper buffer = - new BinaryDictInputUtils.ByteBufferWrapper(inStream.getChannel().map( + final BinaryDictInputOutput.ByteBufferWrapper buffer = + new BinaryDictInputOutput.ByteBufferWrapper(inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, 0, f.length())); final int magic = buffer.readInt(); if (magic != FormatSpec.MAGIC_NUMBER) { @@ -241,7 +241,7 @@ final public class BinaryDictionaryGetter { final int formatVersion = buffer.readInt(); final int headerSize = buffer.readInt(); final HashMap options = CollectionUtils.newHashMap(); - BinaryDictInputUtils.populateOptions(buffer, headerSize, options); + BinaryDictInputOutput.populateOptions(buffer, headerSize, options); final String version = options.get(VERSION_KEY); if (null == version) { diff --git a/java/src/com/android/inputmethod/latin/DictionaryWriter.java b/java/src/com/android/inputmethod/latin/DictionaryWriter.java index ee1be4c10..47151bf61 100644 --- a/java/src/com/android/inputmethod/latin/DictionaryWriter.java +++ b/java/src/com/android/inputmethod/latin/DictionaryWriter.java @@ -20,7 +20,7 @@ import android.content.Context; import com.android.inputmethod.keyboard.ProximityInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; -import com.android.inputmethod.latin.makedict.BinaryDictOutputUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; @@ -87,7 +87,7 @@ public class DictionaryWriter extends AbstractDictionaryWriter { @Override protected void writeBinaryDictionary(final FileOutputStream out) throws IOException, UnsupportedFormatException { - BinaryDictOutputUtils.writeDictionaryBinary(out, mFusionDictionary, FORMAT_OPTIONS); + BinaryDictInputOutput.writeDictionaryBinary(out, mFusionDictionary, FORMAT_OPTIONS); } @Override diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index d962bf8ab..2e6c4b2f8 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -18,8 +18,8 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.CharEncoding; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; @@ -58,22 +58,6 @@ public final class BinaryDictIOUtils { } } - static int getChildrenAddressSize(final int optionFlags, - final FormatOptions formatOptions) { - if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; - switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: - return 1; - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: - return 2; - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: - return 3; - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: - default: - return 0; - } - } - /** * Tours all node without recursive call. */ @@ -102,7 +86,7 @@ public final class BinaryDictIOUtils { if (index != p.mLength) index = p.mLength; if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { - p.mNumOfCharGroup = BinaryDictInputUtils.readCharGroupCount(buffer); + p.mNumOfCharGroup = BinaryDictInputOutput.readCharGroupCount(buffer); p.mAddress += getGroupCountSize(p.mNumOfCharGroup); p.mPosition = 0; } @@ -110,8 +94,8 @@ public final class BinaryDictIOUtils { stack.pop(); continue; } - CharGroupInfo info = BinaryDictInputUtils.readCharGroup(buffer, p.mAddress - headerSize, - formatOptions); + CharGroupInfo info = BinaryDictInputOutput.readCharGroup(buffer, + p.mAddress - headerSize, formatOptions); for (int i = 0; i < info.mCharacters.length; ++i) { pushedChars[index++] = info.mCharacters[i]; } @@ -169,7 +153,7 @@ public final class BinaryDictIOUtils { final Map> bigrams) throws IOException, UnsupportedFormatException { // Read header - final FileHeader header = BinaryDictInputUtils.readHeader(reader.getBuffer()); + final FileHeader header = BinaryDictInputOutput.readHeader(reader.getBuffer()); readUnigramsAndBigramsBinaryInner(reader.getBuffer(), header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } @@ -190,18 +174,18 @@ public final class BinaryDictIOUtils { if (word == null) return FormatSpec.NOT_VALID_WORD; if (buffer.position() != 0) buffer.position(0); - final FileHeader header = BinaryDictInputUtils.readHeader(buffer); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; do { - final int charGroupCount = BinaryDictInputUtils.readCharGroupCount(buffer); + final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer); boolean foundNextCharGroup = false; for (int i = 0; i < charGroupCount; ++i) { final int charGroupPos = buffer.position(); - final CharGroupInfo currentInfo = BinaryDictInputUtils.readCharGroup(buffer, + final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, buffer.position(), header.mFormatOptions); final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags, header.mFormatOptions); @@ -287,7 +271,7 @@ public final class BinaryDictIOUtils { */ private static int writeVariableAddress(final OutputStream destination, final int value) throws IOException { - switch (BinaryDictOutputUtils.getByteSize(value)) { + switch (BinaryDictInputOutput.getByteSize(value)) { case 1: destination.write((byte)value); break; @@ -301,15 +285,15 @@ public final class BinaryDictIOUtils { destination.write((byte)(0xFF & value)); break; } - return BinaryDictOutputUtils.getByteSize(value); + return BinaryDictInputOutput.getByteSize(value); } static void skipCharGroup(final FusionDictionaryBufferInterface buffer, final FormatOptions formatOptions) { final int flags = buffer.readUnsignedByte(); - BinaryDictInputUtils.readParentAddress(buffer, formatOptions); + BinaryDictInputOutput.readParentAddress(buffer, formatOptions); skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - BinaryDictInputUtils.readChildrenAddress(buffer, flags, formatOptions); + BinaryDictInputOutput.readChildrenAddress(buffer, flags, formatOptions); if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) { final int shortcutsSize = buffer.readUnsignedShort(); @@ -427,14 +411,14 @@ public final class BinaryDictIOUtils { if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) { final int shortcutListSize = - BinaryDictOutputUtils.getShortcutListSize(info.mShortcutTargets); + BinaryDictInputOutput.getShortcutListSize(info.mShortcutTargets); destination.write((byte)(shortcutListSize >> 8)); destination.write((byte)(shortcutListSize & 0xFF)); size += 2; final Iterator shortcutIterator = info.mShortcutTargets.iterator(); while (shortcutIterator.hasNext()) { final WeightedString target = shortcutIterator.next(); - destination.write((byte)BinaryDictOutputUtils.makeShortcutFlags( + destination.write((byte)BinaryDictInputOutput.makeShortcutFlags( shortcutIterator.hasNext(), target.mFrequency)); size++; size += writeString(destination, target.mWord); @@ -443,7 +427,7 @@ public final class BinaryDictIOUtils { if (info.mBigrams != null) { // TODO: Consolidate this code with the code that computes the size of the bigram list - // in BinaryDictionaryOutput#computeActualNodeSize + // in BinaryDictionaryInputOutput#computeActualNodeSize for (int i = 0; i < info.mBigrams.size(); ++i) { final int bigramFrequency = info.mBigrams.get(i).mFrequency; @@ -453,7 +437,7 @@ public final class BinaryDictIOUtils { final int bigramOffset = info.mBigrams.get(i).mAddress - (info.mOriginalAddress + size); bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0; - switch (BinaryDictOutputUtils.getByteSize(bigramOffset)) { + switch (BinaryDictInputOutput.getByteSize(bigramOffset)) { case 1: bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE; break; @@ -477,18 +461,18 @@ public final class BinaryDictIOUtils { */ static int computeGroupSize(final CharGroupInfo info, final FormatOptions formatOptions) { int size = FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE - + BinaryDictOutputUtils.getGroupCharactersSize(info.mCharacters) - + getChildrenAddressSize(info.mFlags, formatOptions); + + BinaryDictInputOutput.getGroupCharactersSize(info.mCharacters) + + BinaryDictInputOutput.getChildrenAddressSize(info.mFlags, formatOptions); if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) { size += FormatSpec.GROUP_FREQUENCY_SIZE; } if (info.mShortcutTargets != null && !info.mShortcutTargets.isEmpty()) { - size += BinaryDictOutputUtils.getShortcutListSize(info.mShortcutTargets); + size += BinaryDictInputOutput.getShortcutListSize(info.mShortcutTargets); } if (info.mBigrams != null) { for (final PendingAttribute attr : info.mBigrams) { size += FormatSpec.GROUP_FLAGS_SIZE; - size += BinaryDictOutputUtils.getByteSize(attr.mAddress); + size += BinaryDictInputOutput.getByteSize(attr.mAddress); } } return size; @@ -536,9 +520,9 @@ public final class BinaryDictIOUtils { int position = getTerminalPosition(buffer, word); if (position != FormatSpec.NOT_VALID_WORD) { buffer.position(0); - final FileHeader header = BinaryDictInputUtils.readHeader(buffer); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); buffer.position(position); - return BinaryDictInputUtils.readCharGroup(buffer, position, header.mFormatOptions); + return BinaryDictInputOutput.readCharGroup(buffer, position, header.mFormatOptions); } return null; } @@ -560,10 +544,10 @@ public final class BinaryDictIOUtils { final FileInputStream inStream = new FileInputStream(file); try { inStream.read(buffer); - final BinaryDictInputUtils.ByteBufferWrapper wrapper = - new BinaryDictInputUtils.ByteBufferWrapper(inStream.getChannel().map( + final BinaryDictInputOutput.ByteBufferWrapper wrapper = + new BinaryDictInputOutput.ByteBufferWrapper(inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, offset, length)); - return BinaryDictInputUtils.readHeader(wrapper); + return BinaryDictInputOutput.readHeader(wrapper); } finally { inStream.close(); } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictOutputUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java similarity index 57% rename from java/src/com/android/inputmethod/latin/makedict/BinaryDictOutputUtils.java rename to java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java index 147234eee..a54661058 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictOutputUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputOutput.java @@ -1,5 +1,5 @@ /* - * Copyright (C) 2013 The Android Open Source Project + * Copyright (C) 2011 The Android Open Source Project * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,8 @@ package com.android.inputmethod.latin.makedict; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.CharEncoding; +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; @@ -24,21 +25,26 @@ import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; import java.io.IOException; import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; import java.util.Iterator; +import java.util.Map; +import java.util.TreeMap; /** - * Writes binary files for a FusionDictionary. + * Reads and writes XML files for a FusionDictionary. * * All the methods in this class are static. */ -public class BinaryDictOutputUtils { - - private BinaryDictOutputUtils() { - // This utility class is not publicly instantiable. - } +public final class BinaryDictInputOutput { private static final boolean DBG = MakedictLog.DBG; @@ -48,6 +54,229 @@ public class BinaryDictOutputUtils { // If the number of passes exceeds this number, makedict bails with an exception on // suspicion that a bug might be causing an infinite loop. private static final int MAX_PASSES = 24; + private static final int MAX_JUMPS = 12; + + @UsedForTesting + public interface FusionDictionaryBufferInterface { + public int readUnsignedByte(); + public int readUnsignedShort(); + public int readUnsignedInt24(); + public int readInt(); + public int position(); + public void position(int newPosition); + public void put(final byte b); + public int limit(); + @UsedForTesting + public int capacity(); + } + + public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { + private ByteBuffer mBuffer; + + public ByteBufferWrapper(final ByteBuffer buffer) { + mBuffer = buffer; + } + + @Override + public int readUnsignedByte() { + return mBuffer.get() & 0xFF; + } + + @Override + public int readUnsignedShort() { + return mBuffer.getShort() & 0xFFFF; + } + + @Override + public int readUnsignedInt24() { + final int retval = readUnsignedByte(); + return (retval << 16) + readUnsignedShort(); + } + + @Override + public int readInt() { + return mBuffer.getInt(); + } + + @Override + public int position() { + return mBuffer.position(); + } + + @Override + public void position(int newPos) { + mBuffer.position(newPos); + } + + @Override + public void put(final byte b) { + mBuffer.put(b); + } + + @Override + public int limit() { + return mBuffer.limit(); + } + + @Override + public int capacity() { + return mBuffer.capacity(); + } + } + + /** + * A class grouping utility function for our specific character encoding. + */ + static final class CharEncoding { + private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; + private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF; + + /** + * Helper method to find out whether this code fits on one byte + */ + private static boolean fitsOnOneByte(final int character) { + return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE + && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE; + } + + /** + * Compute the size of a character given its character code. + * + * Char format is: + * 1 byte = bbbbbbbb match + * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte + * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because + * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with + * 00011111 would be outside unicode. + * else: iso-latin-1 code + * This allows for the whole unicode range to be encoded, including chars outside of + * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control + * characters which should never happen anyway (and still work, but take 3 bytes). + * + * @param character the character code. + * @return the size in binary encoded-form, either 1 or 3 bytes. + */ + static int getCharSize(final int character) { + // See char encoding in FusionDictionary.java + if (fitsOnOneByte(character)) return 1; + if (FormatSpec.INVALID_CHARACTER == character) return 1; + return 3; + } + + /** + * Compute the byte size of a character array. + */ + private static int getCharArraySize(final int[] chars) { + int size = 0; + for (int character : chars) size += getCharSize(character); + return size; + } + + /** + * Writes a char array to a byte buffer. + * + * @param codePoints the code point array to write. + * @param buffer the byte buffer to write to. + * @param index the index in buffer to write the character array to. + * @return the index after the last character. + */ + private static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) { + for (int codePoint : codePoints) { + if (1 == getCharSize(codePoint)) { + buffer[index++] = (byte)codePoint; + } else { + buffer[index++] = (byte)(0xFF & (codePoint >> 16)); + buffer[index++] = (byte)(0xFF & (codePoint >> 8)); + buffer[index++] = (byte)(0xFF & codePoint); + } + } + return index; + } + + /** + * Writes a string with our character format to a byte buffer. + * + * This will also write the terminator byte. + * + * @param buffer the byte buffer to write to. + * @param origin the offset to write from. + * @param word the string to write. + * @return the size written, in bytes. + */ + private static int writeString(final byte[] buffer, final int origin, + final String word) { + final int length = word.length(); + int index = origin; + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + if (1 == getCharSize(codePoint)) { + buffer[index++] = (byte)codePoint; + } else { + buffer[index++] = (byte)(0xFF & (codePoint >> 16)); + buffer[index++] = (byte)(0xFF & (codePoint >> 8)); + buffer[index++] = (byte)(0xFF & codePoint); + } + } + buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR; + return index - origin; + } + + /** + * Writes a string with our character format to a ByteArrayOutputStream. + * + * This will also write the terminator byte. + * + * @param buffer the ByteArrayOutputStream to write to. + * @param word the string to write. + */ + private static void writeString(final ByteArrayOutputStream buffer, final String word) { + final int length = word.length(); + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + if (1 == getCharSize(codePoint)) { + buffer.write((byte) codePoint); + } else { + buffer.write((byte) (0xFF & (codePoint >> 16))); + buffer.write((byte) (0xFF & (codePoint >> 8))); + buffer.write((byte) (0xFF & codePoint)); + } + } + buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR); + } + + /** + * Reads a string from a buffer. This is the converse of the above method. + */ + private static String readString(final FusionDictionaryBufferInterface buffer) { + final StringBuilder s = new StringBuilder(); + int character = readChar(buffer); + while (character != FormatSpec.INVALID_CHARACTER) { + s.appendCodePoint(character); + character = readChar(buffer); + } + return s.toString(); + } + + /** + * Reads a character from the buffer. + * + * This follows the character format documented earlier in this source file. + * + * @param buffer the buffer, positioned over an encoded character. + * @return the character code. + */ + static int readChar(final FusionDictionaryBufferInterface buffer) { + int character = buffer.readUnsignedByte(); + if (!fitsOnOneByte(character)) { + if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) { + return FormatSpec.INVALID_CHARACTER; + } + character <<= 16; + character += buffer.readUnsignedShort(); + } + return character; + } + } /** * Compute the binary size of the character array. @@ -116,21 +345,6 @@ public class BinaryDictOutputUtils { return size; } - /** - * Compute the size of the header (flag + [parent address] + characters size) of a CharGroup. - * - * @param group the group of which to compute the size of the header - * @param options file format options. - */ - private static int getGroupHeaderSize(final CharGroup group, final FormatOptions options) { - if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - return FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE - + getGroupCharactersSize(group); - } else { - return FormatSpec.GROUP_FLAGS_SIZE + getGroupCharactersSize(group); - } - } - /** * Compute the maximum size of a CharGroup, assuming 3-byte addresses for everything. * @@ -172,6 +386,21 @@ public class BinaryDictOutputUtils { node.mCachedSize = size; } + /** + * Compute the size of the header (flag + [parent address] + characters size) of a CharGroup. + * + * @param group the group of which to compute the size of the header + * @param options file format options. + */ + private static int getGroupHeaderSize(final CharGroup group, final FormatOptions options) { + if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { + return FormatSpec.GROUP_FLAGS_SIZE + FormatSpec.PARENT_ADDRESS_SIZE + + getGroupCharactersSize(group); + } else { + return FormatSpec.GROUP_FLAGS_SIZE + getGroupCharactersSize(group); + } + } + /** * Compute the size, in bytes, that an address will occupy. * @@ -990,4 +1219,557 @@ public class BinaryDictOutputUtils { destination.close(); MakedictLog.i("Done"); } + + + // Input methods: Read a binary dictionary to memory. + // readDictionaryBinary is the public entry point for them. + + static int getChildrenAddressSize(final int optionFlags, + final FormatOptions formatOptions) { + if (formatOptions.mSupportsDynamicUpdate) return FormatSpec.SIGNED_CHILDREN_ADDRESS_SIZE; + switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + return 1; + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + return 2; + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + return 3; + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: + default: + return 0; + } + } + + static int readChildrenAddress(final FusionDictionaryBufferInterface buffer, + final int optionFlags, final FormatOptions options) { + if (options.mSupportsDynamicUpdate) { + final int address = buffer.readUnsignedInt24(); + if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; + if ((address & FormatSpec.MSB24) != 0) { + return -(address & FormatSpec.SINT24_MAX); + } else { + return address; + } + } + int address; + switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + return buffer.readUnsignedByte(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + return buffer.readUnsignedShort(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + return buffer.readUnsignedInt24(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: + default: + return FormatSpec.NO_CHILDREN_ADDRESS; + } + } + + static int readParentAddress(final FusionDictionaryBufferInterface buffer, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + final int parentAddress = buffer.readUnsignedInt24(); + final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1; + return sign * (parentAddress & FormatSpec.SINT24_MAX); + } else { + return FormatSpec.NO_PARENT_ADDRESS; + } + } + + private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH]; + public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer, + final int originalGroupAddress, final FormatOptions options) { + int addressPointer = originalGroupAddress; + final int flags = buffer.readUnsignedByte(); + ++addressPointer; + + final int parentAddress = readParentAddress(buffer, options); + if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { + addressPointer += 3; + } + + final int characters[]; + if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { + int index = 0; + int character = CharEncoding.readChar(buffer); + addressPointer += CharEncoding.getCharSize(character); + while (-1 != character) { + // FusionDictionary is making sure that the length of the word is smaller than + // MAX_WORD_LENGTH. + // So we'll never write past the end of CHARACTER_BUFFER. + CHARACTER_BUFFER[index++] = character; + character = CharEncoding.readChar(buffer); + addressPointer += CharEncoding.getCharSize(character); + } + characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index); + } else { + final int character = CharEncoding.readChar(buffer); + addressPointer += CharEncoding.getCharSize(character); + characters = new int[] { character }; + } + final int frequency; + if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { + ++addressPointer; + frequency = buffer.readUnsignedByte(); + } else { + frequency = CharGroup.NOT_A_TERMINAL; + } + int childrenAddress = readChildrenAddress(buffer, flags, options); + if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { + childrenAddress += addressPointer; + } + addressPointer += getChildrenAddressSize(flags, options); + ArrayList shortcutTargets = null; + if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { + final int pointerBefore = buffer.position(); + shortcutTargets = new ArrayList(); + buffer.readUnsignedShort(); // Skip the size + while (true) { + final int targetFlags = buffer.readUnsignedByte(); + final String word = CharEncoding.readString(buffer); + shortcutTargets.add(new WeightedString(word, + targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); + if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; + } + addressPointer += buffer.position() - pointerBefore; + } + ArrayList bigrams = null; + if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { + bigrams = new ArrayList(); + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { + final int bigramFlags = buffer.readUnsignedByte(); + ++addressPointer; + final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) + ? 1 : -1; + int bigramAddress = addressPointer; + switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: + bigramAddress += sign * buffer.readUnsignedByte(); + addressPointer += 1; + break; + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: + bigramAddress += sign * buffer.readUnsignedShort(); + addressPointer += 2; + break; + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: + final int offset = (buffer.readUnsignedByte() << 16) + + buffer.readUnsignedShort(); + bigramAddress += sign * offset; + addressPointer += 3; + break; + default: + throw new RuntimeException("Has bigrams with no address"); + } + bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, + bigramAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; + } + if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { + MakedictLog.d("too many bigrams in a group."); + } + } + return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, + parentAddress, childrenAddress, shortcutTargets, bigrams); + } + + /** + * Reads and returns the char group count out of a buffer and forwards the pointer. + */ + public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) { + final int msb = buffer.readUnsignedByte(); + if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) { + return msb; + } else { + return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8) + + buffer.readUnsignedByte(); + } + } + + // The word cache here is a stopgap bandaid to help the catastrophic performance + // of this method. Since it performs direct, unbuffered random access to the file and + // may be called hundreds of thousands of times, the resulting performance is not + // reasonable without some kind of cache. Thus: + private static TreeMap wordCache = + new TreeMap(); + /** + * Finds, as a string, the word at the address passed as an argument. + * + * @param buffer the buffer to read from. + * @param headerSize the size of the header. + * @param address the address to seek. + * @param formatOptions file format options. + * @return the word with its frequency, as a weighted string. + */ + /* package for tests */ static WeightedString getWordAtAddress( + final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, + final FormatOptions formatOptions) { + final WeightedString cachedString = wordCache.get(address); + if (null != cachedString) return cachedString; + + final WeightedString result; + final int originalPointer = buffer.position(); + buffer.position(address); + + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions); + } else { + result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address, + formatOptions); + } + + wordCache.put(address, result); + buffer.position(originalPointer); + return result; + } + + // TODO: static!? This will behave erratically when used in multi-threaded code. + // We need to fix this + private static int[] sGetWordBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; + @SuppressWarnings("unused") + private static WeightedString getWordAtAddressWithParentAddress( + final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, + final FormatOptions options) { + int currentAddress = address; + int index = FormatSpec.MAX_WORD_LENGTH - 1; + int frequency = Integer.MIN_VALUE; + // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH + for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { + CharGroupInfo currentInfo; + int loopCounter = 0; + do { + buffer.position(currentAddress + headerSize); + currentInfo = readCharGroup(buffer, currentAddress, options); + if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { + currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; + } + if (DBG && loopCounter++ > MAX_JUMPS) { + MakedictLog.d("Too many jumps - probably a bug"); + } + } while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)); + if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; + for (int i = 0; i < currentInfo.mCharacters.length; ++i) { + sGetWordBuffer[index--] = + currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1]; + } + if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break; + currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; + } + + return new WeightedString( + new String(sGetWordBuffer, index + 1, FormatSpec.MAX_WORD_LENGTH - index - 1), + frequency); + } + + private static WeightedString getWordAtAddressWithoutParentAddress( + final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, + final FormatOptions options) { + buffer.position(headerSize); + final int count = readCharGroupCount(buffer); + int groupOffset = BinaryDictIOUtils.getGroupCountSize(count); + final StringBuilder builder = new StringBuilder(); + WeightedString result = null; + + CharGroupInfo last = null; + for (int i = count - 1; i >= 0; --i) { + CharGroupInfo info = readCharGroup(buffer, groupOffset, options); + groupOffset = info.mEndAddress; + if (info.mOriginalAddress == address) { + builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); + result = new WeightedString(builder.toString(), info.mFrequency); + break; // and return + } + if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { + if (info.mChildrenAddress > address) { + if (null == last) continue; + builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); + buffer.position(last.mChildrenAddress + headerSize); + i = readCharGroupCount(buffer); + groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); + last = null; + continue; + } + last = info; + } + if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) { + builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); + buffer.position(last.mChildrenAddress + headerSize); + i = readCharGroupCount(buffer); + groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); + last = null; + continue; + } + } + return result; + } + + /** + * Reads a single node from a buffer. + * + * This methods reads the file at the current position. A node is fully expected to start at + * the current position. + * This will recursively read other nodes into the structure, populating the reverse + * maps on the fly and using them to keep track of already read nodes. + * + * @param buffer the buffer, correctly positioned at the start of a node. + * @param headerSize the size, in bytes, of the file header. + * @param reverseNodeMap a mapping from addresses to already read nodes. + * @param reverseGroupMap a mapping from addresses to already read character groups. + * @param options file format options. + * @return the read node with all his children already read. + */ + private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, + final Map reverseNodeMap, final Map reverseGroupMap, + final FormatOptions options) + throws IOException { + final ArrayList nodeContents = new ArrayList(); + final int nodeOrigin = buffer.position() - headerSize; + + do { // Scan the linked-list node. + final int nodeHeadPosition = buffer.position() - headerSize; + final int count = readCharGroupCount(buffer); + int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); + for (int i = count; i > 0; --i) { // Scan the array of CharGroup. + CharGroupInfo info = readCharGroup(buffer, groupOffset, options); + if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; + ArrayList shortcutTargets = info.mShortcutTargets; + ArrayList bigrams = null; + if (null != info.mBigrams) { + bigrams = new ArrayList(); + for (PendingAttribute bigram : info.mBigrams) { + final WeightedString word = getWordAtAddress( + buffer, headerSize, bigram.mAddress, options); + final int reconstructedFrequency = + reconstructBigramFrequency(word.mFrequency, bigram.mFrequency); + bigrams.add(new WeightedString(word.mWord, reconstructedFrequency)); + } + } + if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { + Node children = reverseNodeMap.get(info.mChildrenAddress); + if (null == children) { + final int currentPosition = buffer.position(); + buffer.position(info.mChildrenAddress + headerSize); + children = readNode( + buffer, headerSize, reverseNodeMap, reverseGroupMap, options); + buffer.position(currentPosition); + } + nodeContents.add( + new CharGroup(info.mCharacters, shortcutTargets, bigrams, + info.mFrequency, + 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), + 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); + } else { + nodeContents.add( + new CharGroup(info.mCharacters, shortcutTargets, bigrams, + info.mFrequency, + 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), + 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED))); + } + groupOffset = info.mEndAddress; + } + + // reach the end of the array. + if (options.mSupportsDynamicUpdate) { + final int nextAddress = buffer.readUnsignedInt24(); + if (nextAddress >= 0 && nextAddress < buffer.limit()) { + buffer.position(nextAddress); + } else { + break; + } + } + } while (options.mSupportsDynamicUpdate && + buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); + + final Node node = new Node(nodeContents); + node.mCachedAddressBeforeUpdate = nodeOrigin; + node.mCachedAddressAfterUpdate = nodeOrigin; + reverseNodeMap.put(node.mCachedAddressAfterUpdate, node); + return node; + } + + /** + * Helper function to get the binary format version from the header. + * @throws IOException + */ + private static int getFormatVersion(final FusionDictionaryBufferInterface buffer) + throws IOException { + final int magic = buffer.readInt(); + if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort(); + return FormatSpec.NOT_A_VERSION_NUMBER; + } + + /** + * Helper function to get and validate the binary format version. + * @throws UnsupportedFormatException + * @throws IOException + */ + private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) + throws IOException, UnsupportedFormatException { + final int version = getFormatVersion(buffer); + if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION + || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { + throw new UnsupportedFormatException("This file has version " + version + + ", but this implementation does not support versions above " + + FormatSpec.MAXIMUM_SUPPORTED_VERSION); + } + return version; + } + + /** + * Reads a header from a buffer. + * @param buffer the buffer to read. + * @throws IOException + * @throws UnsupportedFormatException + */ + public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer) + throws IOException, UnsupportedFormatException { + final int version = checkFormatVersion(buffer); + final int optionsFlags = buffer.readUnsignedShort(); + + final HashMap attributes = new HashMap(); + final int headerSize; + headerSize = buffer.readInt(); + + if (headerSize < 0) { + throw new UnsupportedFormatException("header size can't be negative."); + } + + populateOptions(buffer, headerSize, attributes); + buffer.position(headerSize); + + final FileHeader header = new FileHeader(headerSize, + new FusionDictionary.DictionaryOptions(attributes, + 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), + 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), + new FormatOptions(version, + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + return header; + } + + /** + * Reads options from a buffer and populate a map with their contents. + * + * The buffer is read at the current position, so the caller must take care the pointer + * is in the right place before calling this. + */ + public static void populateOptions(final FusionDictionaryBufferInterface buffer, + final int headerSize, final HashMap options) { + while (buffer.position() < headerSize) { + final String key = CharEncoding.readString(buffer); + final String value = CharEncoding.readString(buffer); + options.put(key, value); + } + } + + /** + * Reads a buffer and returns the memory representation of the dictionary. + * + * This high-level method takes a buffer and reads its contents, populating a + * FusionDictionary structure. The optional dict argument is an existing dictionary to + * which words from the buffer should be added. If it is null, a new dictionary is created. + * + * @param reader the reader. + * @param dict an optional dictionary to add words to, or null. + * @return the created (or merged) dictionary. + */ + @UsedForTesting + public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader, + final FusionDictionary dict) throws FileNotFoundException, IOException, + UnsupportedFormatException { + // clear cache + wordCache.clear(); + + // if the buffer has not been opened, open the buffer with bytebuffer. + if (reader.getBuffer() == null) reader.openBuffer( + new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); + if (reader.getBuffer() == null) { + MakedictLog.e("Cannot open the buffer"); + } + + // Read header + final FileHeader header = readHeader(reader.getBuffer()); + + Map reverseNodeMapping = new TreeMap(); + Map reverseGroupMapping = new TreeMap(); + final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, + reverseGroupMapping, header.mFormatOptions); + + FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); + if (null != dict) { + for (final Word w : dict) { + if (w.mIsBlacklistEntry) { + newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord); + } else { + newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord); + } + } + for (final Word w : dict) { + // By construction a binary dictionary may not have bigrams pointing to + // words that are not also registered as unigrams so we don't have to avoid + // them explicitly here. + for (final WeightedString bigram : w.mBigrams) { + newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); + } + } + } + + return newDict; + } + + /** + * Helper method to pass a file name instead of a File object to isBinaryDictionary. + */ + public static boolean isBinaryDictionary(final String filename) { + final File file = new File(filename); + return isBinaryDictionary(file); + } + + /** + * Basic test to find out whether the file is a binary dictionary or not. + * + * Concretely this only tests the magic number. + * + * @param file The file to test. + * @return true if it's a binary dictionary, false otherwise + */ + public static boolean isBinaryDictionary(final File file) { + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + final ByteBuffer buffer = inStream.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, file.length()); + final int version = getFormatVersion(new ByteBufferWrapper(buffer)); + return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION + && version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION); + } catch (FileNotFoundException e) { + return false; + } catch (IOException e) { + return false; + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + } + + /** + * Calculate bigram frequency from compressed value + * + * @see #makeBigramFlags + * + * @param unigramFrequency + * @param bigramFrequency compressed frequency + * @return approximate bigram frequency + */ + public static int reconstructBigramFrequency(final int unigramFrequency, + final int bigramFrequency) { + final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) + / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY); + final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f); + return (int)resultFreqFloat; + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputUtils.java deleted file mode 100644 index c7e344f2f..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictInputUtils.java +++ /dev/null @@ -1,811 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; -import com.android.inputmethod.latin.makedict.FusionDictionary.Node; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; - -import java.io.ByteArrayOutputStream; -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.HashMap; -import java.util.Map; -import java.util.TreeMap; - -/** - * Reads binary files for a FusionDictionary. - * - * All the methods in this class are static. - */ -public final class BinaryDictInputUtils { - - private BinaryDictInputUtils() { - // This utility class is not publicly instantiable. - } - - private static final boolean DBG = MakedictLog.DBG; - - private static final int MAX_JUMPS = 12; - - @UsedForTesting - public interface FusionDictionaryBufferInterface { - public int readUnsignedByte(); - public int readUnsignedShort(); - public int readUnsignedInt24(); - public int readInt(); - public int position(); - public void position(int newPosition); - public void put(final byte b); - public int limit(); - @UsedForTesting - public int capacity(); - } - - public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { - private ByteBuffer mBuffer; - - public ByteBufferWrapper(final ByteBuffer buffer) { - mBuffer = buffer; - } - - @Override - public int readUnsignedByte() { - return mBuffer.get() & 0xFF; - } - - @Override - public int readUnsignedShort() { - return mBuffer.getShort() & 0xFFFF; - } - - @Override - public int readUnsignedInt24() { - final int retval = readUnsignedByte(); - return (retval << 16) + readUnsignedShort(); - } - - @Override - public int readInt() { - return mBuffer.getInt(); - } - - @Override - public int position() { - return mBuffer.position(); - } - - @Override - public void position(int newPos) { - mBuffer.position(newPos); - } - - @Override - public void put(final byte b) { - mBuffer.put(b); - } - - @Override - public int limit() { - return mBuffer.limit(); - } - - @Override - public int capacity() { - return mBuffer.capacity(); - } - } - - /** - * A class grouping utility function for our specific character encoding. - */ - static final class CharEncoding { - private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; - private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF; - - /** - * Helper method to find out whether this code fits on one byte - */ - private static boolean fitsOnOneByte(final int character) { - return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE - && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE; - } - - /** - * Compute the size of a character given its character code. - * - * Char format is: - * 1 byte = bbbbbbbb match - * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte - * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because - * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with - * 00011111 would be outside unicode. - * else: iso-latin-1 code - * This allows for the whole unicode range to be encoded, including chars outside of - * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control - * characters which should never happen anyway (and still work, but take 3 bytes). - * - * @param character the character code. - * @return the size in binary encoded-form, either 1 or 3 bytes. - */ - static int getCharSize(final int character) { - // See char encoding in FusionDictionary.java - if (fitsOnOneByte(character)) return 1; - if (FormatSpec.INVALID_CHARACTER == character) return 1; - return 3; - } - - /** - * Compute the byte size of a character array. - */ - static int getCharArraySize(final int[] chars) { - int size = 0; - for (int character : chars) size += getCharSize(character); - return size; - } - - /** - * Writes a char array to a byte buffer. - * - * @param codePoints the code point array to write. - * @param buffer the byte buffer to write to. - * @param index the index in buffer to write the character array to. - * @return the index after the last character. - */ - static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) { - for (int codePoint : codePoints) { - if (1 == getCharSize(codePoint)) { - buffer[index++] = (byte)codePoint; - } else { - buffer[index++] = (byte)(0xFF & (codePoint >> 16)); - buffer[index++] = (byte)(0xFF & (codePoint >> 8)); - buffer[index++] = (byte)(0xFF & codePoint); - } - } - return index; - } - - /** - * Writes a string with our character format to a byte buffer. - * - * This will also write the terminator byte. - * - * @param buffer the byte buffer to write to. - * @param origin the offset to write from. - * @param word the string to write. - * @return the size written, in bytes. - */ - static int writeString(final byte[] buffer, final int origin, - final String word) { - final int length = word.length(); - int index = origin; - for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { - buffer[index++] = (byte)codePoint; - } else { - buffer[index++] = (byte)(0xFF & (codePoint >> 16)); - buffer[index++] = (byte)(0xFF & (codePoint >> 8)); - buffer[index++] = (byte)(0xFF & codePoint); - } - } - buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR; - return index - origin; - } - - /** - * Writes a string with our character format to a ByteArrayOutputStream. - * - * This will also write the terminator byte. - * - * @param buffer the ByteArrayOutputStream to write to. - * @param word the string to write. - */ - static void writeString(final ByteArrayOutputStream buffer, final String word) { - final int length = word.length(); - for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { - buffer.write((byte) codePoint); - } else { - buffer.write((byte) (0xFF & (codePoint >> 16))); - buffer.write((byte) (0xFF & (codePoint >> 8))); - buffer.write((byte) (0xFF & codePoint)); - } - } - buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR); - } - - /** - * Reads a string from a buffer. This is the converse of the above method. - */ - private static String readString(final FusionDictionaryBufferInterface buffer) { - final StringBuilder s = new StringBuilder(); - int character = readChar(buffer); - while (character != FormatSpec.INVALID_CHARACTER) { - s.appendCodePoint(character); - character = readChar(buffer); - } - return s.toString(); - } - - /** - * Reads a character from the buffer. - * - * This follows the character format documented earlier in this source file. - * - * @param buffer the buffer, positioned over an encoded character. - * @return the character code. - */ - static int readChar(final FusionDictionaryBufferInterface buffer) { - int character = buffer.readUnsignedByte(); - if (!fitsOnOneByte(character)) { - if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) { - return FormatSpec.INVALID_CHARACTER; - } - character <<= 16; - character += buffer.readUnsignedShort(); - } - return character; - } - } - - // Input methods: Read a binary dictionary to memory. - // readDictionaryBinary is the public entry point for them. - - static int readChildrenAddress(final FusionDictionaryBufferInterface buffer, - final int optionFlags, final FormatOptions options) { - if (options.mSupportsDynamicUpdate) { - final int address = buffer.readUnsignedInt24(); - if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; - if ((address & FormatSpec.MSB24) != 0) { - return -(address & FormatSpec.SINT24_MAX); - } else { - return address; - } - } - int address; - switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: - return buffer.readUnsignedByte(); - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: - return buffer.readUnsignedShort(); - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: - return buffer.readUnsignedInt24(); - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - - static int readParentAddress(final FusionDictionaryBufferInterface buffer, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - final int parentAddress = buffer.readUnsignedInt24(); - final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1; - return sign * (parentAddress & FormatSpec.SINT24_MAX); - } else { - return FormatSpec.NO_PARENT_ADDRESS; - } - } - - private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH]; - public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer, - final int originalGroupAddress, final FormatOptions options) { - int addressPointer = originalGroupAddress; - final int flags = buffer.readUnsignedByte(); - ++addressPointer; - - final int parentAddress = readParentAddress(buffer, options); - if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += 3; - } - - final int characters[]; - if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { - int index = 0; - int character = CharEncoding.readChar(buffer); - addressPointer += CharEncoding.getCharSize(character); - while (-1 != character) { - // FusionDictionary is making sure that the length of the word is smaller than - // MAX_WORD_LENGTH. - // So we'll never write past the end of CHARACTER_BUFFER. - CHARACTER_BUFFER[index++] = character; - character = CharEncoding.readChar(buffer); - addressPointer += CharEncoding.getCharSize(character); - } - characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index); - } else { - final int character = CharEncoding.readChar(buffer); - addressPointer += CharEncoding.getCharSize(character); - characters = new int[] { character }; - } - final int frequency; - if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - ++addressPointer; - frequency = buffer.readUnsignedByte(); - } else { - frequency = CharGroup.NOT_A_TERMINAL; - } - int childrenAddress = readChildrenAddress(buffer, flags, options); - if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - childrenAddress += addressPointer; - } - addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); - ArrayList shortcutTargets = null; - if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { - final int pointerBefore = buffer.position(); - shortcutTargets = new ArrayList(); - buffer.readUnsignedShort(); // Skip the size - while (true) { - final int targetFlags = buffer.readUnsignedByte(); - final String word = CharEncoding.readString(buffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; - } - addressPointer += buffer.position() - pointerBefore; - } - ArrayList bigrams = null; - if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { - bigrams = new ArrayList(); - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - final int bigramFlags = buffer.readUnsignedByte(); - ++addressPointer; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = addressPointer; - switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * buffer.readUnsignedByte(); - addressPointer += 1; - break; - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * buffer.readUnsignedShort(); - addressPointer += 2; - break; - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - final int offset = (buffer.readUnsignedByte() << 16) - + buffer.readUnsignedShort(); - bigramAddress += sign * offset; - addressPointer += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; - } - if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - MakedictLog.d("too many bigrams in a group."); - } - } - return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, - parentAddress, childrenAddress, shortcutTargets, bigrams); - } - - /** - * Reads and returns the char group count out of a buffer and forwards the pointer. - */ - public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) { - final int msb = buffer.readUnsignedByte(); - if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) { - return msb; - } else { - return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8) - + buffer.readUnsignedByte(); - } - } - - // The word cache here is a stopgap bandaid to help the catastrophic performance - // of this method. Since it performs direct, unbuffered random access to the file and - // may be called hundreds of thousands of times, the resulting performance is not - // reasonable without some kind of cache. Thus: - private static TreeMap wordCache = - new TreeMap(); - /** - * Finds, as a string, the word at the address passed as an argument. - * - * @param buffer the buffer to read from. - * @param headerSize the size of the header. - * @param address the address to seek. - * @param formatOptions file format options. - * @return the word with its frequency, as a weighted string. - */ - /* package for tests */ static WeightedString getWordAtAddress( - final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, - final FormatOptions formatOptions) { - final WeightedString cachedString = wordCache.get(address); - if (null != cachedString) return cachedString; - - final WeightedString result; - final int originalPointer = buffer.position(); - buffer.position(address); - - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions); - } else { - result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address, - formatOptions); - } - - wordCache.put(address, result); - buffer.position(originalPointer); - return result; - } - - // TODO: static!? This will behave erratically when used in multi-threaded code. - // We need to fix this - private static int[] sGetWordBuffer = new int[FormatSpec.MAX_WORD_LENGTH]; - @SuppressWarnings("unused") - private static WeightedString getWordAtAddressWithParentAddress( - final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, - final FormatOptions options) { - int currentAddress = address; - int index = FormatSpec.MAX_WORD_LENGTH - 1; - int frequency = Integer.MIN_VALUE; - // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH - for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { - CharGroupInfo currentInfo; - int loopCounter = 0; - do { - buffer.position(currentAddress + headerSize); - currentInfo = readCharGroup(buffer, currentAddress, options); - if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { - currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; - } - if (DBG && loopCounter++ > MAX_JUMPS) { - MakedictLog.d("Too many jumps - probably a bug"); - } - } while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)); - if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; - for (int i = 0; i < currentInfo.mCharacters.length; ++i) { - sGetWordBuffer[index--] = - currentInfo.mCharacters[currentInfo.mCharacters.length - i - 1]; - } - if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break; - currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; - } - - return new WeightedString( - new String(sGetWordBuffer, index + 1, FormatSpec.MAX_WORD_LENGTH - index - 1), - frequency); - } - - private static WeightedString getWordAtAddressWithoutParentAddress( - final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, - final FormatOptions options) { - buffer.position(headerSize); - final int count = readCharGroupCount(buffer); - int groupOffset = BinaryDictIOUtils.getGroupCountSize(count); - final StringBuilder builder = new StringBuilder(); - WeightedString result = null; - - CharGroupInfo last = null; - for (int i = count - 1; i >= 0; --i) { - CharGroupInfo info = readCharGroup(buffer, groupOffset, options); - groupOffset = info.mEndAddress; - if (info.mOriginalAddress == address) { - builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); - result = new WeightedString(builder.toString(), info.mFrequency); - break; // and return - } - if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { - if (info.mChildrenAddress > address) { - if (null == last) continue; - builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); - buffer.position(last.mChildrenAddress + headerSize); - i = readCharGroupCount(buffer); - groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); - last = null; - continue; - } - last = info; - } - if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) { - builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); - buffer.position(last.mChildrenAddress + headerSize); - i = readCharGroupCount(buffer); - groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); - last = null; - continue; - } - } - return result; - } - - /** - * Reads a single node from a buffer. - * - * This methods reads the file at the current position. A node is fully expected to start at - * the current position. - * This will recursively read other nodes into the structure, populating the reverse - * maps on the fly and using them to keep track of already read nodes. - * - * @param buffer the buffer, correctly positioned at the start of a node. - * @param headerSize the size, in bytes, of the file header. - * @param reverseNodeMap a mapping from addresses to already read nodes. - * @param reverseGroupMap a mapping from addresses to already read character groups. - * @param options file format options. - * @return the read node with all his children already read. - */ - private static Node readNode(final FusionDictionaryBufferInterface buffer, final int headerSize, - final Map reverseNodeMap, final Map reverseGroupMap, - final FormatOptions options) - throws IOException { - final ArrayList nodeContents = new ArrayList(); - final int nodeOrigin = buffer.position() - headerSize; - - do { // Scan the linked-list node. - final int nodeHeadPosition = buffer.position() - headerSize; - final int count = readCharGroupCount(buffer); - int groupOffset = nodeHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); - for (int i = count; i > 0; --i) { // Scan the array of CharGroup. - CharGroupInfo info = readCharGroup(buffer, groupOffset, options); - if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; - ArrayList shortcutTargets = info.mShortcutTargets; - ArrayList bigrams = null; - if (null != info.mBigrams) { - bigrams = new ArrayList(); - for (PendingAttribute bigram : info.mBigrams) { - final WeightedString word = getWordAtAddress( - buffer, headerSize, bigram.mAddress, options); - final int reconstructedFrequency = - reconstructBigramFrequency(word.mFrequency, bigram.mFrequency); - bigrams.add(new WeightedString(word.mWord, reconstructedFrequency)); - } - } - if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { - Node children = reverseNodeMap.get(info.mChildrenAddress); - if (null == children) { - final int currentPosition = buffer.position(); - buffer.position(info.mChildrenAddress + headerSize); - children = readNode( - buffer, headerSize, reverseNodeMap, reverseGroupMap, options); - buffer.position(currentPosition); - } - nodeContents.add( - new CharGroup(info.mCharacters, shortcutTargets, bigrams, - info.mFrequency, - 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), - 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); - } else { - nodeContents.add( - new CharGroup(info.mCharacters, shortcutTargets, bigrams, - info.mFrequency, - 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), - 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED))); - } - groupOffset = info.mEndAddress; - } - - // reach the end of the array. - if (options.mSupportsDynamicUpdate) { - final int nextAddress = buffer.readUnsignedInt24(); - if (nextAddress >= 0 && nextAddress < buffer.limit()) { - buffer.position(nextAddress); - } else { - break; - } - } - } while (options.mSupportsDynamicUpdate && - buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); - - final Node node = new Node(nodeContents); - node.mCachedAddressBeforeUpdate = nodeOrigin; - node.mCachedAddressAfterUpdate = nodeOrigin; - reverseNodeMap.put(node.mCachedAddressAfterUpdate, node); - return node; - } - - /** - * Helper function to get the binary format version from the header. - * @throws IOException - */ - private static int getFormatVersion(final FusionDictionaryBufferInterface buffer) - throws IOException { - final int magic = buffer.readInt(); - if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort(); - return FormatSpec.NOT_A_VERSION_NUMBER; - } - - /** - * Helper function to get and validate the binary format version. - * @throws UnsupportedFormatException - * @throws IOException - */ - private static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) - throws IOException, UnsupportedFormatException { - final int version = getFormatVersion(buffer); - if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION - || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { - throw new UnsupportedFormatException("This file has version " + version - + ", but this implementation does not support versions above " - + FormatSpec.MAXIMUM_SUPPORTED_VERSION); - } - return version; - } - - /** - * Reads a header from a buffer. - * @param buffer the buffer to read. - * @throws IOException - * @throws UnsupportedFormatException - */ - public static FileHeader readHeader(final FusionDictionaryBufferInterface buffer) - throws IOException, UnsupportedFormatException { - final int version = checkFormatVersion(buffer); - final int optionsFlags = buffer.readUnsignedShort(); - - final HashMap attributes = new HashMap(); - final int headerSize; - headerSize = buffer.readInt(); - - if (headerSize < 0) { - throw new UnsupportedFormatException("header size can't be negative."); - } - - populateOptions(buffer, headerSize, attributes); - buffer.position(headerSize); - - final FileHeader header = new FileHeader(headerSize, - new FusionDictionary.DictionaryOptions(attributes, - 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), - 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); - return header; - } - - /** - * Reads options from a buffer and populate a map with their contents. - * - * The buffer is read at the current position, so the caller must take care the pointer - * is in the right place before calling this. - */ - public static void populateOptions(final FusionDictionaryBufferInterface buffer, - final int headerSize, final HashMap options) { - while (buffer.position() < headerSize) { - final String key = CharEncoding.readString(buffer); - final String value = CharEncoding.readString(buffer); - options.put(key, value); - } - } - - /** - * Reads a buffer and returns the memory representation of the dictionary. - * - * This high-level method takes a buffer and reads its contents, populating a - * FusionDictionary structure. The optional dict argument is an existing dictionary to - * which words from the buffer should be added. If it is null, a new dictionary is created. - * - * @param reader the reader. - * @param dict an optional dictionary to add words to, or null. - * @return the created (or merged) dictionary. - */ - @UsedForTesting - public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader, - final FusionDictionary dict) throws FileNotFoundException, IOException, - UnsupportedFormatException { - // clear cache - wordCache.clear(); - - // if the buffer has not been opened, open the buffer with bytebuffer. - if (reader.getBuffer() == null) reader.openBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - if (reader.getBuffer() == null) { - MakedictLog.e("Cannot open the buffer"); - } - - // Read header - final FileHeader header = readHeader(reader.getBuffer()); - - Map reverseNodeMapping = new TreeMap(); - Map reverseGroupMapping = new TreeMap(); - final Node root = readNode(reader.getBuffer(), header.mHeaderSize, reverseNodeMapping, - reverseGroupMapping, header.mFormatOptions); - - FusionDictionary newDict = new FusionDictionary(root, header.mDictionaryOptions); - if (null != dict) { - for (final Word w : dict) { - if (w.mIsBlacklistEntry) { - newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord); - } else { - newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord); - } - } - for (final Word w : dict) { - // By construction a binary dictionary may not have bigrams pointing to - // words that are not also registered as unigrams so we don't have to avoid - // them explicitly here. - for (final WeightedString bigram : w.mBigrams) { - newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); - } - } - } - - return newDict; - } - - /** - * Helper method to pass a file name instead of a File object to isBinaryDictionary. - */ - public static boolean isBinaryDictionary(final String filename) { - final File file = new File(filename); - return isBinaryDictionary(file); - } - - /** - * Basic test to find out whether the file is a binary dictionary or not. - * - * Concretely this only tests the magic number. - * - * @param file The file to test. - * @return true if it's a binary dictionary, false otherwise - */ - public static boolean isBinaryDictionary(final File file) { - FileInputStream inStream = null; - try { - inStream = new FileInputStream(file); - final ByteBuffer buffer = inStream.getChannel().map( - FileChannel.MapMode.READ_ONLY, 0, file.length()); - final int version = getFormatVersion(new ByteBufferWrapper(buffer)); - return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION - && version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION); - } catch (FileNotFoundException e) { - return false; - } catch (IOException e) { - return false; - } finally { - if (inStream != null) { - try { - inStream.close(); - } catch (IOException e) { - // do nothing - } - } - } - } - - /** - * Calculate bigram frequency from compressed value - * - * @see #BinaryDictOutput.makeBigramFlags - * - * @param unigramFrequency - * @param bigramFrequency compressed frequency - * @return approximate bigram frequency - */ - public static int reconstructBigramFrequency(final int unigramFrequency, - final int bigramFrequency) { - final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) - / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY); - final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f); - return (int)resultFreqFloat; - } -} diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java index e63231f2a..a4a7ce458 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java @@ -17,7 +17,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.utils.ByteArrayWrapper; import java.io.File; @@ -55,7 +55,7 @@ public class BinaryDictReader { } } if (buffer != null) { - return new BinaryDictInputUtils.ByteBufferWrapper(buffer); + return new BinaryDictInputOutput.ByteBufferWrapper(buffer); } return null; } @@ -103,7 +103,7 @@ public class BinaryDictReader { } } if (buffer != null) { - return new BinaryDictInputUtils.ByteBufferWrapper(buffer); + return new BinaryDictInputOutput.ByteBufferWrapper(buffer); } return null; } diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 01669bcfc..5b10912ea 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -58,7 +58,7 @@ public final class DynamicBinaryDictIOUtils { public static void deleteWord(final FusionDictionaryBufferInterface buffer, final String word) throws IOException, UnsupportedFormatException { buffer.position(0); - final FileHeader header = BinaryDictInputUtils.readHeader(buffer); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(buffer, word); if (wordPosition == FormatSpec.NOT_VALID_WORD) return; @@ -114,7 +114,7 @@ public final class DynamicBinaryDictIOUtils { final int originalPosition = buffer.position(); buffer.position(nodeOriginAddress); do { - final int count = BinaryDictInputUtils.readCharGroupCount(buffer); + final int count = BinaryDictInputOutput.readCharGroupCount(buffer); for (int i = 0; i < count; ++i) { updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions); BinaryDictIOUtils.skipCharGroup(buffer, formatOptions); @@ -140,7 +140,7 @@ public final class DynamicBinaryDictIOUtils { final int originalPosition = buffer.position(); buffer.position(groupOriginAddress); final int flags = buffer.readUnsignedByte(); - final int parentAddress = BinaryDictInputUtils.readParentAddress(buffer, formatOptions); + final int parentAddress = BinaryDictInputOutput.readParentAddress(buffer, formatOptions); BinaryDictIOUtils.skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS @@ -174,7 +174,7 @@ public final class DynamicBinaryDictIOUtils { buffer.position(nodeOriginAddress); int jumpCount = 0; while (jumpCount++ < MAX_JUMPS) { - final int count = BinaryDictInputUtils.readCharGroupCount(buffer); + final int count = BinaryDictInputOutput.readCharGroupCount(buffer); for (int i = 0; i < count; ++i) BinaryDictIOUtils.skipCharGroup(buffer, formatOptions); final int forwardLinkAddress = buffer.readUnsignedInt24(); if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { @@ -269,7 +269,7 @@ public final class DynamicBinaryDictIOUtils { // find the insert position of the word. if (buffer.position() != 0) buffer.position(0); - final FileHeader header = BinaryDictInputUtils.readHeader(buffer); + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); final int[] codePoints = FusionDictionary.getCodePoints(word); @@ -279,12 +279,12 @@ public final class DynamicBinaryDictIOUtils { if (wordPos >= wordLen) break; nodeOriginAddress = buffer.position(); int nodeParentAddress = -1; - final int charGroupCount = BinaryDictInputUtils.readCharGroupCount(buffer); + final int charGroupCount = BinaryDictInputOutput.readCharGroupCount(buffer); boolean foundNextGroup = false; for (int i = 0; i < charGroupCount; ++i) { address = buffer.position(); - final CharGroupInfo currentInfo = BinaryDictInputUtils.readCharGroup(buffer, + final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, buffer.position(), header.mFormatOptions); final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, header.mFormatOptions); @@ -305,7 +305,7 @@ public final class DynamicBinaryDictIOUtils { * abc - d - ef */ final int newNodeAddress = buffer.limit(); - final int flags = BinaryDictOutputUtils.makeCharGroupFlags(p > 1, + final int flags = BinaryDictInputOutput.makeCharGroupFlags(p > 1, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, false /* isBlackListEntry */, header.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, @@ -344,7 +344,7 @@ public final class DynamicBinaryDictIOUtils { final int childrenAddress = currentInfo.mChildrenAddress; // move prefix - final int prefixFlags = BinaryDictOutputUtils.makeCharGroupFlags(p > 1, + final int prefixFlags = BinaryDictInputOutput.makeCharGroupFlags(p > 1, false /* isTerminal */, 0 /* childrenAddressSize*/, false /* hasShortcut */, false /* hasBigrams */, false /* isNotAWord */, false /* isBlackListEntry */, @@ -360,7 +360,7 @@ public final class DynamicBinaryDictIOUtils { updateParentAddresses(buffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, header.mFormatOptions); } - final int suffixFlags = BinaryDictOutputUtils.makeCharGroupFlags( + final int suffixFlags = BinaryDictInputOutput.makeCharGroupFlags( suffixCharacters.length > 1, (currentInfo.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0, 0 /* childrenAddressSize */, @@ -378,7 +378,7 @@ public final class DynamicBinaryDictIOUtils { final int[] newCharacters = Arrays.copyOfRange(codePoints, wordPos + p, codePoints.length); - final int flags = BinaryDictOutputUtils.makeCharGroupFlags( + final int flags = BinaryDictInputOutput.makeCharGroupFlags( newCharacters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); @@ -401,7 +401,7 @@ public final class DynamicBinaryDictIOUtils { // only update group. final int newNodeAddress = buffer.limit(); final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; - final int flags = BinaryDictOutputUtils.makeCharGroupFlags(hasMultipleChars, + final int flags = BinaryDictInputOutput.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, @@ -431,7 +431,7 @@ public final class DynamicBinaryDictIOUtils { header.mFormatOptions); final int newGroupAddress = newNodeAddress + 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1; - final int flags = BinaryDictOutputUtils.makeCharGroupFlags(hasMultipleChars, + final int flags = BinaryDictInputOutput.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); @@ -476,7 +476,7 @@ public final class DynamicBinaryDictIOUtils { BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); - final int flags = BinaryDictOutputUtils.makeCharGroupFlags(characters.length > 1, + final int flags = BinaryDictInputOutput.makeCharGroupFlags(characters.length > 1, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isNotAWord, isBlackListEntry, header.mFormatOptions); final CharGroupInfo newInfo = new CharGroupInfo(newNodeAddress + 1, diff --git a/java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java b/java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java index 40f658203..1bb27aa2b 100644 --- a/java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java +++ b/java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java @@ -16,7 +16,7 @@ package com.android.inputmethod.latin.utils; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; /** * This class provides an implementation for the FusionDictionary buffer interface that is backed diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index fc53ed142..a0ad27cfb 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -20,8 +20,7 @@ import android.util.Log; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils; -import com.android.inputmethod.latin.makedict.BinaryDictOutputUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -63,7 +62,7 @@ public final class UserHistoryDictIOUtils { final FormatOptions formatOptions) { final FusionDictionary fusionDict = constructFusionDictionary(dict, bigrams); try { - BinaryDictOutputUtils.writeDictionaryBinary(destination, fusionDict, formatOptions); + BinaryDictInputOutput.writeDictionaryBinary(destination, fusionDict, formatOptions); Log.d(TAG, "end writing"); } catch (IOException e) { Log.e(TAG, "IO exception while writing file", e); @@ -157,7 +156,7 @@ public final class UserHistoryDictIOUtils { continue; } to.setBigram(word1, word2, - BinaryDictInputUtils.reconstructBigramFrequency(unigramFrequency, + BinaryDictInputOutput.reconstructBigramFrequency(unigramFrequency, attr.mFrequency)); } } diff --git a/native/jni/src/suggest/core/dictionary/probability_utils.h b/native/jni/src/suggest/core/dictionary/probability_utils.h index 313ce28bb..f450087d8 100644 --- a/native/jni/src/suggest/core/dictionary/probability_utils.h +++ b/native/jni/src/suggest/core/dictionary/probability_utils.h @@ -41,7 +41,7 @@ class ProbabilityUtils { // the unigram probability to be the median value of the 17th step from the top. A value of // 0 for the bigram probability represents the middle of the 16th step from the top, // while a value of 15 represents the middle of the top step. - // See makedict.BinaryDictInputUtils for details. + // See makedict.BinaryDictInputOutput for details. const float stepSize = static_cast(MAX_PROBABILITY - unigramProbability) / (1.5f + MAX_BIGRAM_ENCODED_PROBABILITY); return unigramProbability diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java index 1aa0c7e7e..7bfd6032a 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOTests.java @@ -22,7 +22,7 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; import android.util.SparseArray; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; @@ -44,7 +44,7 @@ import java.util.Random; import java.util.Set; /** - * Unit tests for BinaryDictInputUtils and BinaryDictOutputUtils. + * Unit tests for BinaryDictInputOutput */ @LargeTest public class BinaryDictIOTests extends AndroidTestCase { @@ -206,7 +206,7 @@ public class BinaryDictIOTests extends AndroidTestCase { // If you need to dump the dict to a textual file, uncomment the line below and the // function above // dumpToCombinedFileForDebug(file, "/tmp/foo"); - BinaryDictOutputUtils.writeDictionaryBinary(out, dict, formatOptions); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, formatOptions); diff = System.currentTimeMillis() - now; out.flush(); @@ -272,7 +272,7 @@ public class BinaryDictIOTests extends AndroidTestCase { getBuffer(reader, bufferType); assertNotNull(reader.getBuffer()); now = System.currentTimeMillis(); - dict = BinaryDictInputUtils.readDictionaryBinary(reader, null); + dict = BinaryDictInputOutput.readDictionaryBinary(reader, null); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IOException while reading dictionary", e); @@ -383,7 +383,7 @@ public class BinaryDictIOTests extends AndroidTestCase { } actBigrams.get(word1).add(word2); - final int bigramFreq = BinaryDictInputUtils.reconstructBigramFrequency( + final int bigramFreq = BinaryDictInputOutput.reconstructBigramFrequency( unigramFreq, attr.mFrequency); assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); } @@ -497,14 +497,14 @@ public class BinaryDictIOTests extends AndroidTestCase { FileHeader header = null; try { - header = BinaryDictInputUtils.readHeader(buffer); + header = BinaryDictInputOutput.readHeader(buffer); } catch (IOException e) { return null; } catch (UnsupportedFormatException e) { return null; } if (header == null) return null; - return BinaryDictInputUtils.getWordAtAddress(buffer, header.mHeaderSize, + return BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, address - header.mHeaderSize, header.mFormatOptions).mWord; } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index 3fe25a16e..e75950703 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -21,8 +21,8 @@ import android.test.MoreAsserts; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.ByteBufferWrapper; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.ByteBufferWrapper; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.Node; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -114,10 +114,10 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { private static void printNode(final FusionDictionaryBufferInterface buffer, final FormatSpec.FormatOptions formatOptions) { Log.d(TAG, "Node at " + buffer.position()); - final int count = BinaryDictInputUtils.readCharGroupCount(buffer); + final int count = BinaryDictInputOutput.readCharGroupCount(buffer); Log.d(TAG, " charGroupCount = " + count); for (int i = 0; i < count; ++i) { - final CharGroupInfo currentInfo = BinaryDictInputUtils.readCharGroup(buffer, + final CharGroupInfo currentInfo = BinaryDictInputOutput.readCharGroup(buffer, buffer.position(), formatOptions); printCharGroup(currentInfo); } @@ -129,7 +129,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { private static void printBinaryFile(final FusionDictionaryBufferInterface buffer) throws IOException, UnsupportedFormatException { - FileHeader header = BinaryDictInputUtils.readHeader(buffer); + FileHeader header = BinaryDictInputOutput.readHeader(buffer); while (buffer.position() < buffer.limit()) { printNode(buffer, header.mFormatOptions); } @@ -252,8 +252,8 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { inStream = new FileInputStream(file); final FusionDictionaryBufferInterface buffer = new ByteBufferWrapper( inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, 0, file.length())); - final FileHeader header = BinaryDictInputUtils.readHeader(buffer); - assertEquals(word, BinaryDictInputUtils.getWordAtAddress(buffer, header.mHeaderSize, + final FileHeader header = BinaryDictInputOutput.readHeader(buffer); + assertEquals(word, BinaryDictInputOutput.getWordAtAddress(buffer, header.mHeaderSize, position - header.mHeaderSize, header.mFormatOptions).mWord); } catch (IOException e) { } catch (UnsupportedFormatException e) { @@ -283,7 +283,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { try { final FileOutputStream out = new FileOutputStream(file); - BinaryDictOutputUtils.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); out.close(); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); @@ -335,7 +335,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { try { final FileOutputStream out = new FileOutputStream(file); - BinaryDictOutputUtils.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); out.close(); } catch (IOException e) { fail("IOException while writing an initial dictionary : " + e); @@ -372,7 +372,7 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { try { final FileOutputStream out = new FileOutputStream(file); - BinaryDictOutputUtils.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, FORMAT_OPTIONS); out.close(); } catch (IOException e) { assertTrue(false); diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java index 9fe21af16..a46e5831b 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java @@ -16,7 +16,7 @@ package com.android.inputmethod.latin.makedict; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictReader.FusionDictionaryBufferFactory; import com.android.inputmethod.latin.makedict.BinaryDictReader. FusionDictionaryBufferFromByteArrayFactory; diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 45ee563b4..d0b460af0 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -16,7 +16,7 @@ package com.android.inputmethod.latin.dicttool; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; @@ -99,7 +99,7 @@ public final class BinaryDictOffdeviceUtils { // over and over, ending in a stack overflow. Hence we limit the depth at which we try // decoding the file. if (depth > MAX_DECODE_DEPTH) return null; - if (BinaryDictInputUtils.isBinaryDictionary(src)) { + if (BinaryDictInputOutput.isBinaryDictionary(src)) { spec.mFile = src; return spec; } @@ -194,7 +194,7 @@ public final class BinaryDictOffdeviceUtils { System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); } - return BinaryDictInputUtils.readDictionaryBinary(reader, null); + return BinaryDictInputOutput.readDictionaryBinary(reader, null); } } } catch (IOException e) { diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index d936bd48e..9bce988ac 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -16,8 +16,7 @@ package com.android.inputmethod.latin.dicttool; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils; -import com.android.inputmethod.latin.makedict.BinaryDictOutputUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -178,7 +177,7 @@ public class DictionaryMaker { inputUnigramXml = filename; } else if (CombinedInputOutput.isCombinedDictionary(filename)) { inputCombined = filename; - } else if (BinaryDictInputUtils.isBinaryDictionary(filename)) { + } else if (BinaryDictInputOutput.isBinaryDictionary(filename)) { inputBinary = filename; } else { throw new IllegalArgumentException( @@ -200,7 +199,7 @@ public class DictionaryMaker { } } else { if (null == inputBinary && null == inputUnigramXml) { - if (BinaryDictInputUtils.isBinaryDictionary(arg)) { + if (BinaryDictInputOutput.isBinaryDictionary(arg)) { inputBinary = arg; } else if (CombinedInputOutput.isCombinedDictionary(arg)) { inputCombined = arg; @@ -270,7 +269,7 @@ public class DictionaryMaker { final File file = new File(binaryFilename); final BinaryDictReader reader = new BinaryDictReader(file); reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - return BinaryDictInputUtils.readDictionaryBinary(reader, null); + return BinaryDictInputOutput.readDictionaryBinary(reader, null); } /** @@ -359,7 +358,7 @@ public class DictionaryMaker { throws FileNotFoundException, IOException, UnsupportedFormatException { final File outputFile = new File(outputFilename); final FormatSpec.FormatOptions formatOptions = new FormatSpec.FormatOptions(version); - BinaryDictOutputUtils.writeDictionaryBinary(new FileOutputStream(outputFilename), dict, + BinaryDictInputOutput.writeDictionaryBinary(new FileOutputStream(outputFilename), dict, formatOptions); } diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index d0e6b9faa..fb1cc8493 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -16,8 +16,7 @@ package com.android.inputmethod.latin.dicttool; -import com.android.inputmethod.latin.makedict.BinaryDictInputUtils; -import com.android.inputmethod.latin.makedict.BinaryDictOutputUtils; +import com.android.inputmethod.latin.makedict.BinaryDictInputOutput; import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; @@ -62,7 +61,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { Compress.getCompressedStream( new BufferedOutputStream(new FileOutputStream(dst))))); - BinaryDictOutputUtils.writeDictionaryBinary(out, dict, new FormatOptions(2, false)); + BinaryDictInputOutput.writeDictionaryBinary(out, dict, new FormatOptions(2, false)); // Test for an actually compressed dictionary and its contents final BinaryDictOffdeviceUtils.DecoderChainSpec decodeSpec = @@ -73,7 +72,7 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size()); final BinaryDictReader reader = new BinaryDictReader(decodeSpec.mFile); reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - final FusionDictionary resultDict = BinaryDictInputUtils.readDictionaryBinary(reader, + final FusionDictionary resultDict = BinaryDictInputOutput.readDictionaryBinary(reader, null /* dict : an optional dictionary to add words to, or null */); assertEquals("Dictionary can't be read back correctly", resultDict.findWordInTree(resultDict.mRoot, "foo").getFrequency(), TEST_FREQ); diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java index caad1c5f6..096902879 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/BinaryDictInputOutputTest.java @@ -25,7 +25,7 @@ import java.util.ArrayList; import java.util.HashMap; /** - * Unit tests for BinaryDictInputUtils and BinaryDictOutputUtils. + * Unit tests for BinaryDictInputOutput. */ public class BinaryDictInputOutputTest extends TestCase { // Test the flattened array contains the expected number of nodes, and @@ -39,7 +39,7 @@ public class BinaryDictInputOutputTest extends TestCase { dict.add("ftb", 1, null, false /* isNotAWord */); dict.add("bar", 1, null, false /* isNotAWord */); dict.add("fool", 1, null, false /* isNotAWord */); - final ArrayList result = BinaryDictOutputUtils.flattenTree(dict.mRoot); + final ArrayList result = BinaryDictInputOutput.flattenTree(dict.mRoot); assertEquals(4, result.size()); while (!result.isEmpty()) { final Node n = result.remove(0); diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java index a1ca2f451..76071133d 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/makedict/FusionDictionaryTest.java @@ -29,7 +29,7 @@ import java.util.HashMap; import java.util.Random; /** - * Unit tests for BinaryDictInputUtils and BinaryDictOutputUtils. + * Unit tests for BinaryDictInputOutput. */ public class FusionDictionaryTest extends TestCase { private static final ArrayList sWords = new ArrayList();