From 77bce05e6f6e3a988253f9305ae22e51f56f5b1a Mon Sep 17 00:00:00 2001 From: Yuichiro Hanada Date: Mon, 19 Aug 2013 14:49:57 +0900 Subject: [PATCH] [Refactor] Rename BinaryDictReader and BinaryDictDecoder. BinaryDictReader -> BinaryDictDecoder. BinaryDictDecoder -> BianryDictDecoderUtils. Change-Id: Iadf2153b379b760538ecda488dda4f17225e5f37 --- .../latin/BinaryDictionaryGetter.java | 14 +- .../latin/makedict/BinaryDictDecoder.java | 878 +++--------------- .../makedict/BinaryDictDecoderUtils.java | 777 ++++++++++++++++ .../latin/makedict/BinaryDictEncoder.java | 2 +- .../latin/makedict/BinaryDictIOUtils.java | 134 +-- .../latin/makedict/BinaryDictReader.java | 169 ---- .../makedict/DynamicBinaryDictIOUtils.java | 183 ++-- ...ReaderInterface.java => HeaderReader.java} | 2 +- .../DynamicPredictionDictionaryBase.java | 6 +- ...yWrapper.java => ByteArrayDictBuffer.java} | 6 +- .../latin/utils/UserHistoryDictIOUtils.java | 7 +- .../BinaryDictDecoderEncoderTests.java | 87 +- ...Tests.java => BinaryDictDecoderTests.java} | 59 +- .../makedict/BinaryDictIOUtilsTests.java | 62 +- .../utils/UserHistoryDictIOUtilsTests.java | 9 +- tools/dicttool/Android.mk | 2 +- .../dicttool/BinaryDictOffdeviceUtils.java | 12 +- .../latin/dicttool/DictionaryMaker.java | 15 +- .../BinaryDictOffdeviceUtilsTests.java | 9 +- 19 files changed, 1231 insertions(+), 1202 deletions(-) create mode 100644 java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java delete mode 100644 java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java rename java/src/com/android/inputmethod/latin/makedict/decoder/{HeaderReaderInterface.java => HeaderReader.java} (96%) rename java/src/com/android/inputmethod/latin/utils/{ByteArrayWrapper.java => ByteArrayDictBuffer.java} (89%) rename tests/src/com/android/inputmethod/latin/makedict/{BinaryDictReaderTests.java => BinaryDictDecoderTests.java} (67%) diff --git a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java index 7e497e9b9..f9f22ecb4 100644 --- a/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java +++ b/java/src/com/android/inputmethod/latin/BinaryDictionaryGetter.java @@ -21,7 +21,7 @@ import android.content.SharedPreferences; import android.content.res.AssetFileDescriptor; import android.util.Log; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils; @@ -231,17 +231,17 @@ final public class BinaryDictionaryGetter { try { // Read the version of the file inStream = new FileInputStream(f); - final BinaryDictDecoder.ByteBufferWrapper buffer = - new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map( + final BinaryDictDecoderUtils.ByteBufferDictBuffer dictBuffer = + new BinaryDictDecoderUtils.ByteBufferDictBuffer(inStream.getChannel().map( FileChannel.MapMode.READ_ONLY, 0, f.length())); - final int magic = buffer.readInt(); + final int magic = dictBuffer.readInt(); if (magic != FormatSpec.MAGIC_NUMBER) { return false; } - final int formatVersion = buffer.readInt(); - final int headerSize = buffer.readInt(); + final int formatVersion = dictBuffer.readInt(); + final int headerSize = dictBuffer.readInt(); final HashMap options = CollectionUtils.newHashMap(); - BinaryDictDecoder.populateOptions(buffer, headerSize, options); + BinaryDictDecoderUtils.populateOptions(dictBuffer, headerSize, options); final String version = options.get(VERSION_KEY); if (null == version) { diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java index 5e3d6d22d..b86dfe552 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoder.java @@ -17,35 +17,23 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; -import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; -import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; -import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; -import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.decoder.HeaderReader; +import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import com.android.inputmethod.latin.utils.JniUtils; -import java.io.ByteArrayOutputStream; import java.io.File; import java.io.FileInputStream; import java.io.FileNotFoundException; import java.io.IOException; +import java.io.RandomAccessFile; import java.nio.ByteBuffer; import java.nio.channels.FileChannel; -import java.util.ArrayList; -import java.util.Arrays; import java.util.HashMap; -import java.util.Map; -import java.util.TreeMap; -/** - * Decodes binary files for a FusionDictionary. - * - * All the methods in this class are static. - */ -public final class BinaryDictDecoder { - - private static final boolean DBG = MakedictLog.DBG; +@UsedForTesting +public class BinaryDictDecoder implements HeaderReader { static { JniUtils.loadNativeLibrary(); @@ -54,742 +42,148 @@ public final class BinaryDictDecoder { // TODO: implement something sensical instead of just a phony method private static native int doNothing(); - private BinaryDictDecoder() { - // This utility class is not publicly instantiable. - } - - private static final int MAX_JUMPS = 12; - - @UsedForTesting - public interface FusionDictionaryBufferInterface { - public int readUnsignedByte(); - public int readUnsignedShort(); - public int readUnsignedInt24(); - public int readInt(); - public int position(); - public void position(int newPosition); - public void put(final byte b); - public int limit(); - @UsedForTesting - public int capacity(); - } - - public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface { - private ByteBuffer mBuffer; - - public ByteBufferWrapper(final ByteBuffer buffer) { - mBuffer = buffer; - } - - @Override - public int readUnsignedByte() { - return mBuffer.get() & 0xFF; - } - - @Override - public int readUnsignedShort() { - return mBuffer.getShort() & 0xFFFF; - } - - @Override - public int readUnsignedInt24() { - final int retval = readUnsignedByte(); - return (retval << 16) + readUnsignedShort(); - } - - @Override - public int readInt() { - return mBuffer.getInt(); - } - - @Override - public int position() { - return mBuffer.position(); - } - - @Override - public void position(int newPos) { - mBuffer.position(newPos); - } - - @Override - public void put(final byte b) { - mBuffer.put(b); - } - - @Override - public int limit() { - return mBuffer.limit(); - } - - @Override - public int capacity() { - return mBuffer.capacity(); - } + public interface DictionaryBufferFactory { + public DictBuffer getDictionaryBuffer(final File file) + throws FileNotFoundException, IOException; } /** - * A class grouping utility function for our specific character encoding. - */ - static final class CharEncoding { - private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; - private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF; - - /** - * Helper method to find out whether this code fits on one byte - */ - private static boolean fitsOnOneByte(final int character) { - return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE - && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE; - } - - /** - * Compute the size of a character given its character code. - * - * Char format is: - * 1 byte = bbbbbbbb match - * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte - * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because - * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with - * 00011111 would be outside unicode. - * else: iso-latin-1 code - * This allows for the whole unicode range to be encoded, including chars outside of - * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control - * characters which should never happen anyway (and still work, but take 3 bytes). - * - * @param character the character code. - * @return the size in binary encoded-form, either 1 or 3 bytes. - */ - static int getCharSize(final int character) { - // See char encoding in FusionDictionary.java - if (fitsOnOneByte(character)) return 1; - if (FormatSpec.INVALID_CHARACTER == character) return 1; - return 3; - } - - /** - * Compute the byte size of a character array. - */ - static int getCharArraySize(final int[] chars) { - int size = 0; - for (int character : chars) size += getCharSize(character); - return size; - } - - /** - * Writes a char array to a byte buffer. - * - * @param codePoints the code point array to write. - * @param buffer the byte buffer to write to. - * @param index the index in buffer to write the character array to. - * @return the index after the last character. - */ - static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) { - for (int codePoint : codePoints) { - if (1 == getCharSize(codePoint)) { - buffer[index++] = (byte)codePoint; - } else { - buffer[index++] = (byte)(0xFF & (codePoint >> 16)); - buffer[index++] = (byte)(0xFF & (codePoint >> 8)); - buffer[index++] = (byte)(0xFF & codePoint); - } - } - return index; - } - - /** - * Writes a string with our character format to a byte buffer. - * - * This will also write the terminator byte. - * - * @param buffer the byte buffer to write to. - * @param origin the offset to write from. - * @param word the string to write. - * @return the size written, in bytes. - */ - static int writeString(final byte[] buffer, final int origin, - final String word) { - final int length = word.length(); - int index = origin; - for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { - buffer[index++] = (byte)codePoint; - } else { - buffer[index++] = (byte)(0xFF & (codePoint >> 16)); - buffer[index++] = (byte)(0xFF & (codePoint >> 8)); - buffer[index++] = (byte)(0xFF & codePoint); - } - } - buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR; - return index - origin; - } - - /** - * Writes a string with our character format to a ByteArrayOutputStream. - * - * This will also write the terminator byte. - * - * @param buffer the ByteArrayOutputStream to write to. - * @param word the string to write. - */ - static void writeString(final ByteArrayOutputStream buffer, final String word) { - final int length = word.length(); - for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { - final int codePoint = word.codePointAt(i); - if (1 == getCharSize(codePoint)) { - buffer.write((byte) codePoint); - } else { - buffer.write((byte) (0xFF & (codePoint >> 16))); - buffer.write((byte) (0xFF & (codePoint >> 8))); - buffer.write((byte) (0xFF & codePoint)); - } - } - buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR); - } - - /** - * Reads a string from a buffer. This is the converse of the above method. - */ - static String readString(final FusionDictionaryBufferInterface buffer) { - final StringBuilder s = new StringBuilder(); - int character = readChar(buffer); - while (character != FormatSpec.INVALID_CHARACTER) { - s.appendCodePoint(character); - character = readChar(buffer); - } - return s.toString(); - } - - /** - * Reads a character from the buffer. - * - * This follows the character format documented earlier in this source file. - * - * @param buffer the buffer, positioned over an encoded character. - * @return the character code. - */ - static int readChar(final FusionDictionaryBufferInterface buffer) { - int character = buffer.readUnsignedByte(); - if (!fitsOnOneByte(character)) { - if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) { - return FormatSpec.INVALID_CHARACTER; - } - character <<= 16; - character += buffer.readUnsignedShort(); - } - return character; - } - } - - // Input methods: Read a binary dictionary to memory. - // readDictionaryBinary is the public entry point for them. - - static int readChildrenAddress(final FusionDictionaryBufferInterface buffer, - final int optionFlags, final FormatOptions options) { - if (options.mSupportsDynamicUpdate) { - final int address = buffer.readUnsignedInt24(); - if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; - if ((address & FormatSpec.MSB24) != 0) { - return -(address & FormatSpec.SINT24_MAX); - } else { - return address; - } - } - int address; - switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: - return buffer.readUnsignedByte(); - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: - return buffer.readUnsignedShort(); - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: - return buffer.readUnsignedInt24(); - case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: - default: - return FormatSpec.NO_CHILDREN_ADDRESS; - } - } - - static int readParentAddress(final FusionDictionaryBufferInterface buffer, - final FormatOptions formatOptions) { - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - final int parentAddress = buffer.readUnsignedInt24(); - final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1; - return sign * (parentAddress & FormatSpec.SINT24_MAX); - } else { - return FormatSpec.NO_PARENT_ADDRESS; - } - } - - private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH]; - public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer, - final int originalGroupAddress, final FormatOptions options) { - int addressPointer = originalGroupAddress; - final int flags = buffer.readUnsignedByte(); - ++addressPointer; - - final int parentAddress = readParentAddress(buffer, options); - if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { - addressPointer += 3; - } - - final int characters[]; - if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { - int index = 0; - int character = CharEncoding.readChar(buffer); - addressPointer += CharEncoding.getCharSize(character); - while (-1 != character) { - // FusionDictionary is making sure that the length of the word is smaller than - // MAX_WORD_LENGTH. - // So we'll never write past the end of CHARACTER_BUFFER. - CHARACTER_BUFFER[index++] = character; - character = CharEncoding.readChar(buffer); - addressPointer += CharEncoding.getCharSize(character); - } - characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index); - } else { - final int character = CharEncoding.readChar(buffer); - addressPointer += CharEncoding.getCharSize(character); - characters = new int[] { character }; - } - final int frequency; - if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { - ++addressPointer; - frequency = buffer.readUnsignedByte(); - } else { - frequency = CharGroup.NOT_A_TERMINAL; - } - int childrenAddress = readChildrenAddress(buffer, flags, options); - if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - childrenAddress += addressPointer; - } - addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); - ArrayList shortcutTargets = null; - if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { - final int pointerBefore = buffer.position(); - shortcutTargets = new ArrayList(); - buffer.readUnsignedShort(); // Skip the size - while (true) { - final int targetFlags = buffer.readUnsignedByte(); - final String word = CharEncoding.readString(buffer); - shortcutTargets.add(new WeightedString(word, - targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); - if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; - } - addressPointer += buffer.position() - pointerBefore; - } - ArrayList bigrams = null; - if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { - bigrams = new ArrayList(); - int bigramCount = 0; - while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - final int bigramFlags = buffer.readUnsignedByte(); - ++addressPointer; - final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) - ? 1 : -1; - int bigramAddress = addressPointer; - switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - bigramAddress += sign * buffer.readUnsignedByte(); - addressPointer += 1; - break; - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - bigramAddress += sign * buffer.readUnsignedShort(); - addressPointer += 2; - break; - case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - final int offset = (buffer.readUnsignedByte() << 16) - + buffer.readUnsignedShort(); - bigramAddress += sign * offset; - addressPointer += 3; - break; - default: - throw new RuntimeException("Has bigrams with no address"); - } - bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, - bigramAddress)); - if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; - } - if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - MakedictLog.d("too many bigrams in a group."); - } - } - return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, - parentAddress, childrenAddress, shortcutTargets, bigrams); - } - - /** - * Reads and returns the char group count out of a buffer and forwards the pointer. - */ - public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) { - final int msb = buffer.readUnsignedByte(); - if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) { - return msb; - } else { - return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8) - + buffer.readUnsignedByte(); - } - } - - /** - * Finds, as a string, the word at the address passed as an argument. + * Creates DictionaryBuffer using a ByteBuffer * - * @param buffer the buffer to read from. - * @param headerSize the size of the header. - * @param address the address to seek. - * @param formatOptions file format options. - * @return the word with its frequency, as a weighted string. + * This class uses less memory than DictionaryBufferFromByteArrayFactory, + * but doesn't perform as fast. + * When operating on a big dictionary, this class is preferred. */ - /* package for tests */ static WeightedString getWordAtAddress( - final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, - final FormatOptions formatOptions) { - final WeightedString result; - final int originalPointer = buffer.position(); - buffer.position(address); - - if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { - result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions); - } else { - result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address, - formatOptions); - } - - buffer.position(originalPointer); - return result; - } - - @SuppressWarnings("unused") - private static WeightedString getWordAtAddressWithParentAddress( - final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, - final FormatOptions options) { - int currentAddress = address; - int frequency = Integer.MIN_VALUE; - final StringBuilder builder = new StringBuilder(); - // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH - for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { - CharGroupInfo currentInfo; - int loopCounter = 0; - do { - buffer.position(currentAddress + headerSize); - currentInfo = readCharGroup(buffer, currentAddress, options); - if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { - currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; - } - if (DBG && loopCounter++ > MAX_JUMPS) { - MakedictLog.d("Too many jumps - probably a bug"); - } - } while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)); - if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; - builder.insert(0, - new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length)); - if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break; - currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; - } - return new WeightedString(builder.toString(), frequency); - } - - private static WeightedString getWordAtAddressWithoutParentAddress( - final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, - final FormatOptions options) { - buffer.position(headerSize); - final int count = readCharGroupCount(buffer); - int groupOffset = BinaryDictIOUtils.getGroupCountSize(count); - final StringBuilder builder = new StringBuilder(); - WeightedString result = null; - - CharGroupInfo last = null; - for (int i = count - 1; i >= 0; --i) { - CharGroupInfo info = readCharGroup(buffer, groupOffset, options); - groupOffset = info.mEndAddress; - if (info.mOriginalAddress == address) { - builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); - result = new WeightedString(builder.toString(), info.mFrequency); - break; // and return - } - if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { - if (info.mChildrenAddress > address) { - if (null == last) continue; - builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); - buffer.position(last.mChildrenAddress + headerSize); - i = readCharGroupCount(buffer); - groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); - last = null; - continue; - } - last = info; - } - if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) { - builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); - buffer.position(last.mChildrenAddress + headerSize); - i = readCharGroupCount(buffer); - groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); - last = null; - continue; - } - } - return result; - } - - /** - * Reads a single node array from a buffer. - * - * This methods reads the file at the current position. A node array is fully expected to start - * at the current position. - * This will recursively read other node arrays into the structure, populating the reverse - * maps on the fly and using them to keep track of already read nodes. - * - * @param buffer the buffer, correctly positioned at the start of a node array. - * @param headerSize the size, in bytes, of the file header. - * @param reverseNodeArrayMap a mapping from addresses to already read node arrays. - * @param reverseGroupMap a mapping from addresses to already read character groups. - * @param options file format options. - * @return the read node array with all his children already read. - */ - private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer, - final int headerSize, final Map reverseNodeArrayMap, - final Map reverseGroupMap, final FormatOptions options) - throws IOException { - final ArrayList nodeArrayContents = new ArrayList(); - final int nodeArrayOrigin = buffer.position() - headerSize; - - do { // Scan the linked-list node. - final int nodeArrayHeadPosition = buffer.position() - headerSize; - final int count = readCharGroupCount(buffer); - int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); - for (int i = count; i > 0; --i) { // Scan the array of CharGroup. - CharGroupInfo info = readCharGroup(buffer, groupOffset, options); - if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; - ArrayList shortcutTargets = info.mShortcutTargets; - ArrayList bigrams = null; - if (null != info.mBigrams) { - bigrams = new ArrayList(); - for (PendingAttribute bigram : info.mBigrams) { - final WeightedString word = getWordAtAddress( - buffer, headerSize, bigram.mAddress, options); - final int reconstructedFrequency = - reconstructBigramFrequency(word.mFrequency, bigram.mFrequency); - bigrams.add(new WeightedString(word.mWord, reconstructedFrequency)); - } - } - if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { - PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress); - if (null == children) { - final int currentPosition = buffer.position(); - buffer.position(info.mChildrenAddress + headerSize); - children = readNodeArray( - buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options); - buffer.position(currentPosition); - } - nodeArrayContents.add( - new CharGroup(info.mCharacters, shortcutTargets, bigrams, - info.mFrequency, - 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), - 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); - } else { - nodeArrayContents.add( - new CharGroup(info.mCharacters, shortcutTargets, bigrams, - info.mFrequency, - 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), - 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED))); - } - groupOffset = info.mEndAddress; - } - - // reach the end of the array. - if (options.mSupportsDynamicUpdate) { - final int nextAddress = buffer.readUnsignedInt24(); - if (nextAddress >= 0 && nextAddress < buffer.limit()) { - buffer.position(nextAddress); - } else { - break; - } - } - } while (options.mSupportsDynamicUpdate && - buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); - - final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents); - nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin; - nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin; - reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray); - return nodeArray; - } - - /** - * Helper function to get the binary format version from the header. - * @throws IOException - */ - private static int getFormatVersion(final FusionDictionaryBufferInterface buffer) - throws IOException { - final int magic = buffer.readInt(); - if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort(); - return FormatSpec.NOT_A_VERSION_NUMBER; - } - - /** - * Helper function to get and validate the binary format version. - * @throws UnsupportedFormatException - * @throws IOException - */ - static int checkFormatVersion(final FusionDictionaryBufferInterface buffer) - throws IOException, UnsupportedFormatException { - final int version = getFormatVersion(buffer); - if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION - || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { - throw new UnsupportedFormatException("This file has version " + version - + ", but this implementation does not support versions above " - + FormatSpec.MAXIMUM_SUPPORTED_VERSION); - } - return version; - } - - /** - * Reads a header from a buffer. - * @param headerReader the header reader - * @throws IOException - * @throws UnsupportedFormatException - */ - public static FileHeader readHeader(final HeaderReaderInterface headerReader) - throws IOException, UnsupportedFormatException { - final int version = headerReader.readVersion(); - final int optionsFlags = headerReader.readOptionFlags(); - - final int headerSize = headerReader.readHeaderSize(); - - if (headerSize < 0) { - throw new UnsupportedFormatException("header size can't be negative."); - } - - final HashMap attributes = headerReader.readAttributes(headerSize); - - final FileHeader header = new FileHeader(headerSize, - new FusionDictionary.DictionaryOptions(attributes, - 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), - 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), - new FormatOptions(version, - 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); - return header; - } - - /** - * Reads options from a buffer and populate a map with their contents. - * - * The buffer is read at the current position, so the caller must take care the pointer - * is in the right place before calling this. - */ - public static void populateOptions(final FusionDictionaryBufferInterface buffer, - final int headerSize, final HashMap options) { - while (buffer.position() < headerSize) { - final String key = CharEncoding.readString(buffer); - final String value = CharEncoding.readString(buffer); - options.put(key, value); - } - } - - /** - * Reads a buffer and returns the memory representation of the dictionary. - * - * This high-level method takes a buffer and reads its contents, populating a - * FusionDictionary structure. The optional dict argument is an existing dictionary to - * which words from the buffer should be added. If it is null, a new dictionary is created. - * - * @param reader the reader. - * @param dict an optional dictionary to add words to, or null. - * @return the created (or merged) dictionary. - */ - @UsedForTesting - public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader, - final FusionDictionary dict) throws FileNotFoundException, IOException, - UnsupportedFormatException { - - // if the buffer has not been opened, open the buffer with bytebuffer. - if (reader.getBuffer() == null) reader.openBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - if (reader.getBuffer() == null) { - MakedictLog.e("Cannot open the buffer"); - } - - // Read header - final FileHeader fileHeader = readHeader(reader); - - Map reverseNodeArrayMapping = new TreeMap(); - Map reverseGroupMapping = new TreeMap(); - final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize, - reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions); - - FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions); - if (null != dict) { - for (final Word w : dict) { - if (w.mIsBlacklistEntry) { - newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord); - } else { - newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord); - } - } - for (final Word w : dict) { - // By construction a binary dictionary may not have bigrams pointing to - // words that are not also registered as unigrams so we don't have to avoid - // them explicitly here. - for (final WeightedString bigram : w.mBigrams) { - newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); - } - } - } - - return newDict; - } - - /** - * Helper method to pass a file name instead of a File object to isBinaryDictionary. - */ - public static boolean isBinaryDictionary(final String filename) { - final File file = new File(filename); - return isBinaryDictionary(file); - } - - /** - * Basic test to find out whether the file is a binary dictionary or not. - * - * Concretely this only tests the magic number. - * - * @param file The file to test. - * @return true if it's a binary dictionary, false otherwise - */ - public static boolean isBinaryDictionary(final File file) { - FileInputStream inStream = null; - try { - inStream = new FileInputStream(file); - final ByteBuffer buffer = inStream.getChannel().map( - FileChannel.MapMode.READ_ONLY, 0, file.length()); - final int version = getFormatVersion(new ByteBufferWrapper(buffer)); - return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION - && version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION); - } catch (FileNotFoundException e) { - return false; - } catch (IOException e) { - return false; - } finally { - if (inStream != null) { - try { + public static final class DictionaryBufferFromReadOnlyByteBufferFactory + implements DictionaryBufferFactory { + @Override + public DictBuffer getDictionaryBuffer(final File file) + throws FileNotFoundException, IOException { + FileInputStream inStream = null; + ByteBuffer buffer = null; + try { + inStream = new FileInputStream(file); + buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, + 0, file.length()); + } finally { + if (inStream != null) { + inStream.close(); + } + } + if (buffer != null) { + return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer); + } + return null; + } + } + + /** + * Creates DictionaryBuffer using a byte array + * + * This class performs faster than other classes, but consumes more memory. + * When operating on a small dictionary, this class is preferred. + */ + public static final class DictionaryBufferFromByteArrayFactory + implements DictionaryBufferFactory { + @Override + public DictBuffer getDictionaryBuffer(final File file) + throws FileNotFoundException, IOException { + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + final byte[] array = new byte[(int) file.length()]; + inStream.read(array); + return new ByteArrayDictBuffer(array); + } finally { + if (inStream != null) { inStream.close(); - } catch (IOException e) { - // do nothing } } } } /** - * Calculate bigram frequency from compressed value + * Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile. * - * @param unigramFrequency - * @param bigramFrequency compressed frequency - * @return approximate bigram frequency + * This class doesn't perform as fast as other classes, + * but this class is the only option available for destructive operations (insert or delete) + * on a dictionary. */ - public static int reconstructBigramFrequency(final int unigramFrequency, - final int bigramFrequency) { - final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) - / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY); - final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f); - return (int)resultFreqFloat; + @UsedForTesting + public static final class DictionaryBufferFromWritableByteBufferFactory + implements DictionaryBufferFactory { + @Override + public DictBuffer getDictionaryBuffer(final File file) + throws FileNotFoundException, IOException { + RandomAccessFile raFile = null; + ByteBuffer buffer = null; + try { + raFile = new RandomAccessFile(file, "rw"); + buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length()); + } finally { + if (raFile != null) { + raFile.close(); + } + } + if (buffer != null) { + return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer); + } + return null; + } + } + + private final File mDictionaryBinaryFile; + private DictBuffer mDictBuffer; + + public BinaryDictDecoder(final File file) { + mDictionaryBinaryFile = file; + mDictBuffer = null; + } + + public void openDictBuffer(final DictionaryBufferFactory factory) + throws FileNotFoundException, IOException { + mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile); + } + + public DictBuffer getDictBuffer() { + return mDictBuffer; + } + + @UsedForTesting + public DictBuffer openAndGetDictBuffer( + final DictionaryBufferFactory factory) + throws FileNotFoundException, IOException { + openDictBuffer(factory); + return getDictBuffer(); + } + + // The implementation of HeaderReader + @Override + public int readVersion() throws IOException, UnsupportedFormatException { + return BinaryDictDecoderUtils.checkFormatVersion(mDictBuffer); + } + + @Override + public int readOptionFlags() { + return mDictBuffer.readUnsignedShort(); + } + + @Override + public int readHeaderSize() { + return mDictBuffer.readInt(); + } + + @Override + public HashMap readAttributes(final int headerSize) { + final HashMap attributes = new HashMap(); + while (mDictBuffer.position() < headerSize) { + // We can avoid infinite loop here since mFusionDictonary.position() is always increased + // by calling CharEncoding.readString. + final String key = CharEncoding.readString(mDictBuffer); + final String value = CharEncoding.readString(mDictBuffer); + attributes.put(key, value); + } + mDictBuffer.position(headerSize); + return attributes; } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java new file mode 100644 index 000000000..efa491099 --- /dev/null +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderUtils.java @@ -0,0 +1,777 @@ +/* + * Copyright (C) 2013 The Android Open Source Project + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package com.android.inputmethod.latin.makedict; + +import com.android.inputmethod.annotations.UsedForTesting; +import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; +import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; +import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; +import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; +import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; +import com.android.inputmethod.latin.makedict.decoder.HeaderReader; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.channels.FileChannel; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; +import java.util.TreeMap; + +/** + * Decodes binary files for a FusionDictionary. + * + * All the methods in this class are static. + * + * TODO: Remove calls from classes except BinaryDictDecoder + * TODO: Move this file to makedict/internal. + */ +public final class BinaryDictDecoderUtils { + + private static final boolean DBG = MakedictLog.DBG; + + private BinaryDictDecoderUtils() { + // This utility class is not publicly instantiable. + } + + private static final int MAX_JUMPS = 12; + + @UsedForTesting + public interface DictBuffer { + public int readUnsignedByte(); + public int readUnsignedShort(); + public int readUnsignedInt24(); + public int readInt(); + public int position(); + public void position(int newPosition); + public void put(final byte b); + public int limit(); + @UsedForTesting + public int capacity(); + } + + public static final class ByteBufferDictBuffer implements DictBuffer { + private ByteBuffer mBuffer; + + public ByteBufferDictBuffer(final ByteBuffer buffer) { + mBuffer = buffer; + } + + @Override + public int readUnsignedByte() { + return mBuffer.get() & 0xFF; + } + + @Override + public int readUnsignedShort() { + return mBuffer.getShort() & 0xFFFF; + } + + @Override + public int readUnsignedInt24() { + final int retval = readUnsignedByte(); + return (retval << 16) + readUnsignedShort(); + } + + @Override + public int readInt() { + return mBuffer.getInt(); + } + + @Override + public int position() { + return mBuffer.position(); + } + + @Override + public void position(int newPos) { + mBuffer.position(newPos); + } + + @Override + public void put(final byte b) { + mBuffer.put(b); + } + + @Override + public int limit() { + return mBuffer.limit(); + } + + @Override + public int capacity() { + return mBuffer.capacity(); + } + } + + /** + * A class grouping utility function for our specific character encoding. + */ + static final class CharEncoding { + private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20; + private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF; + + /** + * Helper method to find out whether this code fits on one byte + */ + private static boolean fitsOnOneByte(final int character) { + return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE + && character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE; + } + + /** + * Compute the size of a character given its character code. + * + * Char format is: + * 1 byte = bbbbbbbb match + * case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte + * else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because + * unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with + * 00011111 would be outside unicode. + * else: iso-latin-1 code + * This allows for the whole unicode range to be encoded, including chars outside of + * the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control + * characters which should never happen anyway (and still work, but take 3 bytes). + * + * @param character the character code. + * @return the size in binary encoded-form, either 1 or 3 bytes. + */ + static int getCharSize(final int character) { + // See char encoding in FusionDictionary.java + if (fitsOnOneByte(character)) return 1; + if (FormatSpec.INVALID_CHARACTER == character) return 1; + return 3; + } + + /** + * Compute the byte size of a character array. + */ + static int getCharArraySize(final int[] chars) { + int size = 0; + for (int character : chars) size += getCharSize(character); + return size; + } + + /** + * Writes a char array to a byte buffer. + * + * @param codePoints the code point array to write. + * @param buffer the byte buffer to write to. + * @param index the index in buffer to write the character array to. + * @return the index after the last character. + */ + static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) { + for (int codePoint : codePoints) { + if (1 == getCharSize(codePoint)) { + buffer[index++] = (byte)codePoint; + } else { + buffer[index++] = (byte)(0xFF & (codePoint >> 16)); + buffer[index++] = (byte)(0xFF & (codePoint >> 8)); + buffer[index++] = (byte)(0xFF & codePoint); + } + } + return index; + } + + /** + * Writes a string with our character format to a byte buffer. + * + * This will also write the terminator byte. + * + * @param buffer the byte buffer to write to. + * @param origin the offset to write from. + * @param word the string to write. + * @return the size written, in bytes. + */ + static int writeString(final byte[] buffer, final int origin, + final String word) { + final int length = word.length(); + int index = origin; + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + if (1 == getCharSize(codePoint)) { + buffer[index++] = (byte)codePoint; + } else { + buffer[index++] = (byte)(0xFF & (codePoint >> 16)); + buffer[index++] = (byte)(0xFF & (codePoint >> 8)); + buffer[index++] = (byte)(0xFF & codePoint); + } + } + buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR; + return index - origin; + } + + /** + * Writes a string with our character format to a ByteArrayOutputStream. + * + * This will also write the terminator byte. + * + * @param buffer the ByteArrayOutputStream to write to. + * @param word the string to write. + */ + static void writeString(final ByteArrayOutputStream buffer, final String word) { + final int length = word.length(); + for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) { + final int codePoint = word.codePointAt(i); + if (1 == getCharSize(codePoint)) { + buffer.write((byte) codePoint); + } else { + buffer.write((byte) (0xFF & (codePoint >> 16))); + buffer.write((byte) (0xFF & (codePoint >> 8))); + buffer.write((byte) (0xFF & codePoint)); + } + } + buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR); + } + + /** + * Reads a string from a DictBuffer. This is the converse of the above method. + */ + static String readString(final DictBuffer dictBuffer) { + final StringBuilder s = new StringBuilder(); + int character = readChar(dictBuffer); + while (character != FormatSpec.INVALID_CHARACTER) { + s.appendCodePoint(character); + character = readChar(dictBuffer); + } + return s.toString(); + } + + /** + * Reads a character from the buffer. + * + * This follows the character format documented earlier in this source file. + * + * @param dictBuffer the buffer, positioned over an encoded character. + * @return the character code. + */ + static int readChar(final DictBuffer dictBuffer) { + int character = dictBuffer.readUnsignedByte(); + if (!fitsOnOneByte(character)) { + if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) { + return FormatSpec.INVALID_CHARACTER; + } + character <<= 16; + character += dictBuffer.readUnsignedShort(); + } + return character; + } + } + + // Input methods: Read a binary dictionary to memory. + // readDictionaryBinary is the public entry point for them. + + static int readChildrenAddress(final DictBuffer dictBuffer, + final int optionFlags, final FormatOptions options) { + if (options.mSupportsDynamicUpdate) { + final int address = dictBuffer.readUnsignedInt24(); + if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS; + if ((address & FormatSpec.MSB24) != 0) { + return -(address & FormatSpec.SINT24_MAX); + } else { + return address; + } + } + int address; + switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) { + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE: + return dictBuffer.readUnsignedByte(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES: + return dictBuffer.readUnsignedShort(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES: + return dictBuffer.readUnsignedInt24(); + case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS: + default: + return FormatSpec.NO_CHILDREN_ADDRESS; + } + } + + static int readParentAddress(final DictBuffer dictBuffer, + final FormatOptions formatOptions) { + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + final int parentAddress = dictBuffer.readUnsignedInt24(); + final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1; + return sign * (parentAddress & FormatSpec.SINT24_MAX); + } else { + return FormatSpec.NO_PARENT_ADDRESS; + } + } + + private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH]; + public static CharGroupInfo readCharGroup(final DictBuffer dictBuffer, + final int originalGroupAddress, final FormatOptions options) { + int addressPointer = originalGroupAddress; + final int flags = dictBuffer.readUnsignedByte(); + ++addressPointer; + + final int parentAddress = readParentAddress(dictBuffer, options); + if (BinaryDictIOUtils.supportsDynamicUpdate(options)) { + addressPointer += 3; + } + + final int characters[]; + if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) { + int index = 0; + int character = CharEncoding.readChar(dictBuffer); + addressPointer += CharEncoding.getCharSize(character); + while (-1 != character) { + // FusionDictionary is making sure that the length of the word is smaller than + // MAX_WORD_LENGTH. + // So we'll never write past the end of CHARACTER_BUFFER. + CHARACTER_BUFFER[index++] = character; + character = CharEncoding.readChar(dictBuffer); + addressPointer += CharEncoding.getCharSize(character); + } + characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index); + } else { + final int character = CharEncoding.readChar(dictBuffer); + addressPointer += CharEncoding.getCharSize(character); + characters = new int[] { character }; + } + final int frequency; + if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) { + ++addressPointer; + frequency = dictBuffer.readUnsignedByte(); + } else { + frequency = CharGroup.NOT_A_TERMINAL; + } + int childrenAddress = readChildrenAddress(dictBuffer, flags, options); + if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { + childrenAddress += addressPointer; + } + addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options); + ArrayList shortcutTargets = null; + if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) { + final int pointerBefore = dictBuffer.position(); + shortcutTargets = new ArrayList(); + dictBuffer.readUnsignedShort(); // Skip the size + while (true) { + final int targetFlags = dictBuffer.readUnsignedByte(); + final String word = CharEncoding.readString(dictBuffer); + shortcutTargets.add(new WeightedString(word, + targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY)); + if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; + } + addressPointer += dictBuffer.position() - pointerBefore; + } + ArrayList bigrams = null; + if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) { + bigrams = new ArrayList(); + int bigramCount = 0; + while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { + final int bigramFlags = dictBuffer.readUnsignedByte(); + ++addressPointer; + final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE) + ? 1 : -1; + int bigramAddress = addressPointer; + switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: + bigramAddress += sign * dictBuffer.readUnsignedByte(); + addressPointer += 1; + break; + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: + bigramAddress += sign * dictBuffer.readUnsignedShort(); + addressPointer += 2; + break; + case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: + final int offset = (dictBuffer.readUnsignedByte() << 16) + + dictBuffer.readUnsignedShort(); + bigramAddress += sign * offset; + addressPointer += 3; + break; + default: + throw new RuntimeException("Has bigrams with no address"); + } + bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY, + bigramAddress)); + if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break; + } + if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { + MakedictLog.d("too many bigrams in a group."); + } + } + return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency, + parentAddress, childrenAddress, shortcutTargets, bigrams); + } + + /** + * Reads and returns the char group count out of a buffer and forwards the pointer. + */ + public static int readCharGroupCount(final DictBuffer dictBuffer) { + final int msb = dictBuffer.readUnsignedByte(); + if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) { + return msb; + } else { + return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8) + + dictBuffer.readUnsignedByte(); + } + } + + /** + * Finds, as a string, the word at the address passed as an argument. + * + * @param dictBuffer the buffer to read from. + * @param headerSize the size of the header. + * @param address the address to seek. + * @param formatOptions file format options. + * @return the word with its frequency, as a weighted string. + */ + /* package for tests */ static WeightedString getWordAtAddress( + final DictBuffer dictBuffer, final int headerSize, final int address, + final FormatOptions formatOptions) { + final WeightedString result; + final int originalPointer = dictBuffer.position(); + dictBuffer.position(address); + + if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { + result = getWordAtAddressWithParentAddress(dictBuffer, headerSize, address, + formatOptions); + } else { + result = getWordAtAddressWithoutParentAddress(dictBuffer, headerSize, address, + formatOptions); + } + + dictBuffer.position(originalPointer); + return result; + } + + @SuppressWarnings("unused") + private static WeightedString getWordAtAddressWithParentAddress( + final DictBuffer dictBuffer, final int headerSize, final int address, + final FormatOptions options) { + int currentAddress = address; + int frequency = Integer.MIN_VALUE; + final StringBuilder builder = new StringBuilder(); + // the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH + for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { + CharGroupInfo currentInfo; + int loopCounter = 0; + do { + dictBuffer.position(currentAddress + headerSize); + currentInfo = readCharGroup(dictBuffer, currentAddress, options); + if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { + currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; + } + if (DBG && loopCounter++ > MAX_JUMPS) { + MakedictLog.d("Too many jumps - probably a bug"); + } + } while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)); + if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; + builder.insert(0, + new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length)); + if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break; + currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; + } + return new WeightedString(builder.toString(), frequency); + } + + private static WeightedString getWordAtAddressWithoutParentAddress( + final DictBuffer dictBuffer, final int headerSize, final int address, + final FormatOptions options) { + dictBuffer.position(headerSize); + final int count = readCharGroupCount(dictBuffer); + int groupOffset = BinaryDictIOUtils.getGroupCountSize(count); + final StringBuilder builder = new StringBuilder(); + WeightedString result = null; + + CharGroupInfo last = null; + for (int i = count - 1; i >= 0; --i) { + CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options); + groupOffset = info.mEndAddress; + if (info.mOriginalAddress == address) { + builder.append(new String(info.mCharacters, 0, info.mCharacters.length)); + result = new WeightedString(builder.toString(), info.mFrequency); + break; // and return + } + if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { + if (info.mChildrenAddress > address) { + if (null == last) continue; + builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); + dictBuffer.position(last.mChildrenAddress + headerSize); + i = readCharGroupCount(dictBuffer); + groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); + last = null; + continue; + } + last = info; + } + if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) { + builder.append(new String(last.mCharacters, 0, last.mCharacters.length)); + dictBuffer.position(last.mChildrenAddress + headerSize); + i = readCharGroupCount(dictBuffer); + groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i); + last = null; + continue; + } + } + return result; + } + + /** + * Reads a single node array from a buffer. + * + * This methods reads the file at the current position. A node array is fully expected to start + * at the current position. + * This will recursively read other node arrays into the structure, populating the reverse + * maps on the fly and using them to keep track of already read nodes. + * + * @param dictBuffer the buffer, correctly positioned at the start of a node array. + * @param headerSize the size, in bytes, of the file header. + * @param reverseNodeArrayMap a mapping from addresses to already read node arrays. + * @param reverseGroupMap a mapping from addresses to already read character groups. + * @param options file format options. + * @return the read node array with all his children already read. + */ + private static PtNodeArray readNodeArray(final DictBuffer dictBuffer, + final int headerSize, final Map reverseNodeArrayMap, + final Map reverseGroupMap, final FormatOptions options) + throws IOException { + final ArrayList nodeArrayContents = new ArrayList(); + final int nodeArrayOrigin = dictBuffer.position() - headerSize; + + do { // Scan the linked-list node. + final int nodeArrayHeadPosition = dictBuffer.position() - headerSize; + final int count = readCharGroupCount(dictBuffer); + int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count); + for (int i = count; i > 0; --i) { // Scan the array of CharGroup. + CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options); + if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue; + ArrayList shortcutTargets = info.mShortcutTargets; + ArrayList bigrams = null; + if (null != info.mBigrams) { + bigrams = new ArrayList(); + for (PendingAttribute bigram : info.mBigrams) { + final WeightedString word = getWordAtAddress( + dictBuffer, headerSize, bigram.mAddress, options); + final int reconstructedFrequency = + BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency, + bigram.mFrequency); + bigrams.add(new WeightedString(word.mWord, reconstructedFrequency)); + } + } + if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) { + PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress); + if (null == children) { + final int currentPosition = dictBuffer.position(); + dictBuffer.position(info.mChildrenAddress + headerSize); + children = readNodeArray(dictBuffer, headerSize, reverseNodeArrayMap, + reverseGroupMap, options); + dictBuffer.position(currentPosition); + } + nodeArrayContents.add( + new CharGroup(info.mCharacters, shortcutTargets, bigrams, + info.mFrequency, + 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), + 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children)); + } else { + nodeArrayContents.add( + new CharGroup(info.mCharacters, shortcutTargets, bigrams, + info.mFrequency, + 0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD), + 0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED))); + } + groupOffset = info.mEndAddress; + } + + // reach the end of the array. + if (options.mSupportsDynamicUpdate) { + final int nextAddress = dictBuffer.readUnsignedInt24(); + if (nextAddress >= 0 && nextAddress < dictBuffer.limit()) { + dictBuffer.position(nextAddress); + } else { + break; + } + } + } while (options.mSupportsDynamicUpdate && + dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); + + final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents); + nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin; + nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin; + reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray); + return nodeArray; + } + + /** + * Helper function to get the binary format version from the header. + * @throws IOException + */ + private static int getFormatVersion(final DictBuffer dictBuffer) + throws IOException { + final int magic = dictBuffer.readInt(); + if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort(); + return FormatSpec.NOT_A_VERSION_NUMBER; + } + + /** + * Helper function to get and validate the binary format version. + * @throws UnsupportedFormatException + * @throws IOException + */ + static int checkFormatVersion(final DictBuffer dictBuffer) + throws IOException, UnsupportedFormatException { + final int version = getFormatVersion(dictBuffer); + if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION + || version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) { + throw new UnsupportedFormatException("This file has version " + version + + ", but this implementation does not support versions above " + + FormatSpec.MAXIMUM_SUPPORTED_VERSION); + } + return version; + } + + /** + * Reads a header from a buffer. + * @param headerReader the header reader + * @throws IOException + * @throws UnsupportedFormatException + */ + public static FileHeader readHeader(final HeaderReader headerReader) + throws IOException, UnsupportedFormatException { + final int version = headerReader.readVersion(); + final int optionsFlags = headerReader.readOptionFlags(); + + final int headerSize = headerReader.readHeaderSize(); + + if (headerSize < 0) { + throw new UnsupportedFormatException("header size can't be negative."); + } + + final HashMap attributes = headerReader.readAttributes(headerSize); + + final FileHeader header = new FileHeader(headerSize, + new FusionDictionary.DictionaryOptions(attributes, + 0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG), + 0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)), + new FormatOptions(version, + 0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE))); + return header; + } + + /** + * Reads options from a buffer and populate a map with their contents. + * + * The buffer is read at the current position, so the caller must take care the pointer + * is in the right place before calling this. + */ + public static void populateOptions(final DictBuffer dictBuffer, + final int headerSize, final HashMap options) { + while (dictBuffer.position() < headerSize) { + final String key = CharEncoding.readString(dictBuffer); + final String value = CharEncoding.readString(dictBuffer); + options.put(key, value); + } + } + + /** + * Reads a buffer and returns the memory representation of the dictionary. + * + * This high-level method takes a buffer and reads its contents, populating a + * FusionDictionary structure. The optional dict argument is an existing dictionary to + * which words from the buffer should be added. If it is null, a new dictionary is created. + * + * @param dictDecoder the dict decoder. + * @param dict an optional dictionary to add words to, or null. + * @return the created (or merged) dictionary. + */ + @UsedForTesting + public static FusionDictionary readDictionaryBinary(final BinaryDictDecoder dictDecoder, + final FusionDictionary dict) throws FileNotFoundException, IOException, + UnsupportedFormatException { + + // if the buffer has not been opened, open the buffer with bytebuffer. + if (dictDecoder.getDictBuffer() == null) dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + if (dictDecoder.getDictBuffer() == null) { + MakedictLog.e("Cannot open the buffer"); + } + + // Read header + final FileHeader fileHeader = readHeader(dictDecoder); + + Map reverseNodeArrayMapping = new TreeMap(); + Map reverseGroupMapping = new TreeMap(); + final PtNodeArray root = readNodeArray(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize, + reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions); + + FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions); + if (null != dict) { + for (final Word w : dict) { + if (w.mIsBlacklistEntry) { + newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord); + } else { + newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord); + } + } + for (final Word w : dict) { + // By construction a binary dictionary may not have bigrams pointing to + // words that are not also registered as unigrams so we don't have to avoid + // them explicitly here. + for (final WeightedString bigram : w.mBigrams) { + newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency); + } + } + } + + return newDict; + } + + /** + * Helper method to pass a file name instead of a File object to isBinaryDictionary. + */ + public static boolean isBinaryDictionary(final String filename) { + final File file = new File(filename); + return isBinaryDictionary(file); + } + + /** + * Basic test to find out whether the file is a binary dictionary or not. + * + * Concretely this only tests the magic number. + * + * @param file The file to test. + * @return true if it's a binary dictionary, false otherwise + */ + public static boolean isBinaryDictionary(final File file) { + FileInputStream inStream = null; + try { + inStream = new FileInputStream(file); + final ByteBuffer buffer = inStream.getChannel().map( + FileChannel.MapMode.READ_ONLY, 0, file.length()); + final int version = getFormatVersion(new ByteBufferDictBuffer(buffer)); + return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION + && version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION); + } catch (FileNotFoundException e) { + return false; + } catch (IOException e) { + return false; + } finally { + if (inStream != null) { + try { + inStream.close(); + } catch (IOException e) { + // do nothing + } + } + } + } +} diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java index d9005b926..ff11cde39 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictEncoder.java @@ -16,7 +16,7 @@ package com.android.inputmethod.latin.makedict; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java index e5735aaca..4cf72e915 100644 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtils.java @@ -18,13 +18,13 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; -import com.android.inputmethod.latin.utils.ByteArrayWrapper; +import com.android.inputmethod.latin.utils.ByteArrayDictBuffer; import java.io.File; import java.io.FileInputStream; @@ -62,7 +62,7 @@ public final class BinaryDictIOUtils { * Retrieves all node arrays without recursive call. */ private static void readUnigramsAndBigramsBinaryInner( - final FusionDictionaryBufferInterface buffer, final int headerSize, + final DictBuffer dictBuffer, final int headerSize, final Map words, final Map frequencies, final Map> bigrams, final FormatOptions formatOptions) { @@ -82,11 +82,11 @@ public final class BinaryDictIOUtils { p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength); } - if (buffer.position() != p.mAddress) buffer.position(p.mAddress); + if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress); if (index != p.mLength) index = p.mLength; if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { - p.mNumOfCharGroup = BinaryDictDecoder.readCharGroupCount(buffer); + p.mNumOfCharGroup = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); p.mAddress += getGroupCountSize(p.mNumOfCharGroup); p.mPosition = 0; } @@ -94,7 +94,7 @@ public final class BinaryDictIOUtils { stack.pop(); continue; } - CharGroupInfo info = BinaryDictDecoder.readCharGroup(buffer, + CharGroupInfo info = BinaryDictDecoderUtils.readCharGroup(dictBuffer, p.mAddress - headerSize, formatOptions); for (int i = 0; i < info.mCharacters.length; ++i) { pushedChars[index++] = info.mCharacters[i]; @@ -114,7 +114,7 @@ public final class BinaryDictIOUtils { if (p.mPosition == p.mNumOfCharGroup) { if (formatOptions.mSupportsDynamicUpdate) { - final int forwardLinkAddress = buffer.readUnsignedInt24(); + final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { // The node array has a forward link. p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; @@ -127,7 +127,7 @@ public final class BinaryDictIOUtils { } } else { // The node array has more groups. - p.mAddress = buffer.position(); + p.mAddress = dictBuffer.position(); } if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) { @@ -141,20 +141,20 @@ public final class BinaryDictIOUtils { * Reads unigrams and bigrams from the binary file. * Doesn't store a full memory representation of the dictionary. * - * @param dictReader the dict reader. + * @param dictDecoder the dict decoder. * @param words the map to store the address as a key and the word as a value. * @param frequencies the map to store the address as a key and the frequency as a value. * @param bigrams the map to store the address as a key and the list of address as a value. * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ - public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader, + public static void readUnigramsAndBigramsBinary(final BinaryDictDecoder dictDecoder, final Map words, final Map frequencies, final Map> bigrams) throws IOException, UnsupportedFormatException { // Read header - final FileHeader header = BinaryDictDecoder.readHeader(dictReader); - readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words, + final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder); + readUnigramsAndBigramsBinaryInner(dictDecoder.getDictBuffer(), header.mHeaderSize, words, frequencies, bigrams, header.mFormatOptions); } @@ -162,32 +162,32 @@ public final class BinaryDictIOUtils { * Gets the address of the last CharGroup of the exact matching word in the dictionary. * If no match is found, returns NOT_VALID_WORD. * - * @param dictReader the dict reader. + * @param dictDecoder the dict decoder. * @param word the word we search for. * @return the address of the terminal node. * @throws IOException if the file can't be read. * @throws UnsupportedFormatException if the format of the file is not recognized. */ @UsedForTesting - public static int getTerminalPosition(final BinaryDictReader dictReader, + public static int getTerminalPosition(final BinaryDictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { - final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); if (word == null) return FormatSpec.NOT_VALID_WORD; - if (buffer.position() != 0) buffer.position(0); + if (dictBuffer.position() != 0) dictBuffer.position(0); - final FileHeader header = BinaryDictDecoder.readHeader(dictReader); + final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder); int wordPos = 0; final int wordLen = word.codePointCount(0, word.length()); for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; do { - final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer); + final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); boolean foundNextCharGroup = false; for (int i = 0; i < charGroupCount; ++i) { - final int charGroupPos = buffer.position(); - final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, - buffer.position(), header.mFormatOptions); + final int charGroupPos = dictBuffer.position(); + final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup( + dictBuffer, dictBuffer.position(), header.mFormatOptions); final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags, header.mFormatOptions); final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags, @@ -219,7 +219,7 @@ public final class BinaryDictIOUtils { return FormatSpec.NOT_VALID_WORD; } foundNextCharGroup = true; - buffer.position(currentInfo.mChildrenAddress); + dictBuffer.position(currentInfo.mChildrenAddress); break; } } @@ -233,11 +233,11 @@ public final class BinaryDictIOUtils { return FormatSpec.NOT_VALID_WORD; } - final int forwardLinkAddress = buffer.readUnsignedInt24(); + final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { return FormatSpec.NOT_VALID_WORD; } - buffer.position(forwardLinkAddress); + dictBuffer.position(forwardLinkAddress); } while(true); } return FormatSpec.NOT_VALID_WORD; @@ -246,12 +246,12 @@ public final class BinaryDictIOUtils { /** * @return the size written, in bytes. Always 3 bytes. */ - static int writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer, + static int writeSInt24ToBuffer(final DictBuffer dictBuffer, final int value) { final int absValue = Math.abs(value); - buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); - buffer.put((byte)((absValue >> 8) & 0xFF)); - buffer.put((byte)(absValue & 0xFF)); + dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); + dictBuffer.put((byte)((absValue >> 8) & 0xFF)); + dictBuffer.put((byte)(absValue & 0xFF)); return 3; } @@ -289,31 +289,31 @@ public final class BinaryDictIOUtils { return BinaryDictEncoder.getByteSize(value); } - static void skipCharGroup(final FusionDictionaryBufferInterface buffer, + static void skipCharGroup(final DictBuffer dictBuffer, final FormatOptions formatOptions) { - final int flags = buffer.readUnsignedByte(); - BinaryDictDecoder.readParentAddress(buffer, formatOptions); - skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - BinaryDictDecoder.readChildrenAddress(buffer, flags, formatOptions); - if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); + final int flags = dictBuffer.readUnsignedByte(); + BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions); + skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); + BinaryDictDecoderUtils.readChildrenAddress(dictBuffer, flags, formatOptions); + if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte(); if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) { - final int shortcutsSize = buffer.readUnsignedShort(); - buffer.position(buffer.position() + shortcutsSize + final int shortcutsSize = dictBuffer.readUnsignedShort(); + dictBuffer.position(dictBuffer.position() + shortcutsSize - FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE); } if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) { int bigramCount = 0; while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { - final int bigramFlags = buffer.readUnsignedByte(); + final int bigramFlags = dictBuffer.readUnsignedByte(); switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: - buffer.readUnsignedByte(); + dictBuffer.readUnsignedByte(); break; case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: - buffer.readUnsignedShort(); + dictBuffer.readUnsignedShort(); break; case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: - buffer.readUnsignedInt24(); + dictBuffer.readUnsignedInt24(); break; } if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break; @@ -324,15 +324,15 @@ public final class BinaryDictIOUtils { } } - static void skipString(final FusionDictionaryBufferInterface buffer, + static void skipString(final DictBuffer dictBuffer, final boolean hasMultipleChars) { if (hasMultipleChars) { - int character = CharEncoding.readChar(buffer); + int character = CharEncoding.readChar(dictBuffer); while (character != FormatSpec.INVALID_CHARACTER) { - character = CharEncoding.readChar(buffer); + character = CharEncoding.readChar(dictBuffer); } } else { - CharEncoding.readChar(buffer); + CharEncoding.readChar(dictBuffer); } } @@ -508,24 +508,25 @@ public final class BinaryDictIOUtils { } /** - * Find a word using the BinaryDictReader. + * Find a word using the BinaryDictDecoder. * - * @param dictReader the dict reader + * @param dictDecoder the dict reader * @param word the word searched * @return the found group * @throws IOException * @throws UnsupportedFormatException */ @UsedForTesting - public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader, + public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { - int position = getTerminalPosition(dictReader, word); - final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); + int position = getTerminalPosition(dictDecoder, word); + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); if (position != FormatSpec.NOT_VALID_WORD) { - buffer.position(0); - final FileHeader header = BinaryDictDecoder.readHeader(dictReader); - buffer.position(position); - return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions); + dictBuffer.position(0); + final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder); + dictBuffer.position(position); + return BinaryDictDecoderUtils.readCharGroup(dictBuffer, position, + header.mFormatOptions); } return null; } @@ -544,21 +545,21 @@ public final class BinaryDictIOUtils { final File file, final long offset, final long length) throws FileNotFoundException, IOException, UnsupportedFormatException { final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; - final BinaryDictReader dictReader = new BinaryDictReader(file); - dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() { + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); + dictDecoder.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFactory() { @Override - public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file) + public DictBuffer getDictionaryBuffer(File file) throws FileNotFoundException, IOException { final FileInputStream inStream = new FileInputStream(file); try { inStream.read(buffer); - return new ByteArrayWrapper(buffer); + return new ByteArrayDictBuffer(buffer); } finally { inStream.close(); } } }); - return BinaryDictDecoder.readHeader(dictReader); + return BinaryDictDecoderUtils.readHeader(dictDecoder); } public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, @@ -636,4 +637,19 @@ public final class BinaryDictIOUtils { return 0; } } + + /** + * Calculate bigram frequency from compressed value + * + * @param unigramFrequency + * @param bigramFrequency compressed frequency + * @return approximate bigram frequency + */ + public static int reconstructBigramFrequency(final int unigramFrequency, + final int bigramFrequency) { + final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) + / (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY); + final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f); + return (int)resultFreqFloat; + } } diff --git a/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java b/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java deleted file mode 100644 index 6d3b31a28..000000000 --- a/java/src/com/android/inputmethod/latin/makedict/BinaryDictReader.java +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (C) 2013 The Android Open Source Project - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package com.android.inputmethod.latin.makedict; - -import com.android.inputmethod.annotations.UsedForTesting; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; -import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface; -import com.android.inputmethod.latin.utils.ByteArrayWrapper; - -import java.io.File; -import java.io.FileInputStream; -import java.io.FileNotFoundException; -import java.io.IOException; -import java.io.RandomAccessFile; -import java.nio.ByteBuffer; -import java.nio.channels.FileChannel; -import java.util.HashMap; - -public class BinaryDictReader implements HeaderReaderInterface { - - public interface FusionDictionaryBufferFactory { - public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) - throws FileNotFoundException, IOException; - } - - /** - * Creates FusionDictionaryBuffer from a ByteBuffer - */ - public static final class FusionDictionaryBufferFromByteBufferFactory - implements FusionDictionaryBufferFactory { - @Override - public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) - throws FileNotFoundException, IOException { - FileInputStream inStream = null; - ByteBuffer buffer = null; - try { - inStream = new FileInputStream(file); - buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY, - 0, file.length()); - } finally { - if (inStream != null) { - inStream.close(); - } - } - if (buffer != null) { - return new BinaryDictDecoder.ByteBufferWrapper(buffer); - } - return null; - } - } - - /** - * Creates FusionDictionaryBuffer from a byte array - */ - public static final class FusionDictionaryBufferFromByteArrayFactory - implements FusionDictionaryBufferFactory { - @Override - public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) - throws FileNotFoundException, IOException { - FileInputStream inStream = null; - try { - inStream = new FileInputStream(file); - final byte[] array = new byte[(int) file.length()]; - inStream.read(array); - return new ByteArrayWrapper(array); - } finally { - if (inStream != null) { - inStream.close(); - } - } - } - } - - /** - * Creates FusionDictionaryBuffer from a RandomAccessFile. - */ - @UsedForTesting - public static final class FusionDictionaryBufferFromWritableByteBufferFactory - implements FusionDictionaryBufferFactory { - @Override - public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file) - throws FileNotFoundException, IOException { - RandomAccessFile raFile = null; - ByteBuffer buffer = null; - try { - raFile = new RandomAccessFile(file, "rw"); - buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length()); - } finally { - if (raFile != null) { - raFile.close(); - } - } - if (buffer != null) { - return new BinaryDictDecoder.ByteBufferWrapper(buffer); - } - return null; - } - } - - private final File mDictionaryBinaryFile; - private FusionDictionaryBufferInterface mFusionDictionaryBuffer; - - public BinaryDictReader(final File file) { - mDictionaryBinaryFile = file; - mFusionDictionaryBuffer = null; - } - - public void openBuffer(final FusionDictionaryBufferFactory factory) - throws FileNotFoundException, IOException { - mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile); - } - - public FusionDictionaryBufferInterface getBuffer() { - return mFusionDictionaryBuffer; - } - - @UsedForTesting - public FusionDictionaryBufferInterface openAndGetBuffer( - final FusionDictionaryBufferFactory factory) - throws FileNotFoundException, IOException { - openBuffer(factory); - return getBuffer(); - } - - // The implementation of HeaderReaderInterface - @Override - public int readVersion() throws IOException, UnsupportedFormatException { - return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer); - } - - @Override - public int readOptionFlags() { - return mFusionDictionaryBuffer.readUnsignedShort(); - } - - @Override - public int readHeaderSize() { - return mFusionDictionaryBuffer.readInt(); - } - - @Override - public HashMap readAttributes(final int headerSize) { - final HashMap attributes = new HashMap(); - while (mFusionDictionaryBuffer.position() < headerSize) { - // We can avoid infinite loop here since mFusionDictonary.position() is always increased - // by calling CharEncoding.readString. - final String key = CharEncoding.readString(mFusionDictionaryBuffer); - final String value = CharEncoding.readString(mFusionDictionaryBuffer); - attributes.put(key, value); - } - mFusionDictionaryBuffer.position(headerSize); - return attributes; - } -} diff --git a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java index 584b793fb..5361d2eba 100644 --- a/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/makedict/DynamicBinaryDictIOUtils.java @@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict; import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.Constants; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -49,142 +49,146 @@ public final class DynamicBinaryDictIOUtils { /** * Delete the word from the binary file. * - * @param dictReader the dict reader. + * @param dictDecoder the dict decoder. * @param word the word we delete * @throws IOException * @throws UnsupportedFormatException */ @UsedForTesting - public static void deleteWord(final BinaryDictReader dictReader, final String word) + public static void deleteWord(final BinaryDictDecoder dictDecoder, final String word) throws IOException, UnsupportedFormatException { - final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); - buffer.position(0); - final FileHeader header = BinaryDictDecoder.readHeader(dictReader); - final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word); + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); + dictBuffer.position(0); + final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder); + final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word); if (wordPosition == FormatSpec.NOT_VALID_WORD) return; - buffer.position(wordPosition); - final int flags = buffer.readUnsignedByte(); - buffer.position(wordPosition); - buffer.put((byte)markAsDeleted(flags)); + dictBuffer.position(wordPosition); + final int flags = dictBuffer.readUnsignedByte(); + dictBuffer.position(wordPosition); + dictBuffer.put((byte)markAsDeleted(flags)); } /** * Update a parent address in a CharGroup that is referred to by groupOriginAddress. * - * @param buffer the buffer to write. + * @param dictBuffer the DictBuffer to write. * @param groupOriginAddress the address of the group. * @param newParentAddress the absolute address of the parent. * @param formatOptions file format options. */ - public static void updateParentAddress(final FusionDictionaryBufferInterface buffer, + public static void updateParentAddress(final DictBuffer dictBuffer, final int groupOriginAddress, final int newParentAddress, final FormatOptions formatOptions) { - final int originalPosition = buffer.position(); - buffer.position(groupOriginAddress); + final int originalPosition = dictBuffer.position(); + dictBuffer.position(groupOriginAddress); if (!formatOptions.mSupportsDynamicUpdate) { throw new RuntimeException("this file format does not support parent addresses"); } - final int flags = buffer.readUnsignedByte(); + final int flags = dictBuffer.readUnsignedByte(); if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { // If the group is moved, the parent address is stored in the destination group. // We are guaranteed to process the destination group later, so there is no need to // update anything here. - buffer.position(originalPosition); + dictBuffer.position(originalPosition); return; } if (DBG) { MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress); } final int parentOffset = newParentAddress - groupOriginAddress; - BinaryDictIOUtils.writeSInt24ToBuffer(buffer, parentOffset); - buffer.position(originalPosition); + BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset); + dictBuffer.position(originalPosition); } /** * Update parent addresses in a node array stored at nodeOriginAddress. * - * @param buffer the buffer to be modified. + * @param dictBuffer the DictBuffer to be modified. * @param nodeOriginAddress the address of the node array to update. * @param newParentAddress the address to be written. * @param formatOptions file format options. */ - public static void updateParentAddresses(final FusionDictionaryBufferInterface buffer, + public static void updateParentAddresses(final DictBuffer dictBuffer, final int nodeOriginAddress, final int newParentAddress, final FormatOptions formatOptions) { - final int originalPosition = buffer.position(); - buffer.position(nodeOriginAddress); + final int originalPosition = dictBuffer.position(); + dictBuffer.position(nodeOriginAddress); do { - final int count = BinaryDictDecoder.readCharGroupCount(buffer); + final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); for (int i = 0; i < count; ++i) { - updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions); - BinaryDictIOUtils.skipCharGroup(buffer, formatOptions); + updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress, + formatOptions); + BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions); } - final int forwardLinkAddress = buffer.readUnsignedInt24(); - buffer.position(forwardLinkAddress); + final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); + dictBuffer.position(forwardLinkAddress); } while (formatOptions.mSupportsDynamicUpdate - && buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); - buffer.position(originalPosition); + && dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); + dictBuffer.position(originalPosition); } /** * Update a children address in a CharGroup that is addressed by groupOriginAddress. * - * @param buffer the buffer to write. + * @param dictBuffer the DictBuffer to write. * @param groupOriginAddress the address of the group. * @param newChildrenAddress the absolute address of the child. * @param formatOptions file format options. */ - public static void updateChildrenAddress(final FusionDictionaryBufferInterface buffer, + public static void updateChildrenAddress(final DictBuffer dictBuffer, final int groupOriginAddress, final int newChildrenAddress, final FormatOptions formatOptions) { - final int originalPosition = buffer.position(); - buffer.position(groupOriginAddress); - final int flags = buffer.readUnsignedByte(); - final int parentAddress = BinaryDictDecoder.readParentAddress(buffer, formatOptions); - BinaryDictIOUtils.skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); - if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); + final int originalPosition = dictBuffer.position(); + dictBuffer.position(groupOriginAddress); + final int flags = dictBuffer.readUnsignedByte(); + final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer, + formatOptions); + BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); + if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte(); final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS - ? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position(); - BinaryDictIOUtils.writeSInt24ToBuffer(buffer, childrenOffset); - buffer.position(originalPosition); + ? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - dictBuffer.position(); + BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, childrenOffset); + dictBuffer.position(originalPosition); } /** * Helper method to move a char group to the tail of the file. */ private static int moveCharGroup(final OutputStream destination, - final FusionDictionaryBufferInterface buffer, final CharGroupInfo info, + final DictBuffer dictBuffer, final CharGroupInfo info, final int nodeArrayOriginAddress, final int oldGroupAddress, final FormatOptions formatOptions) throws IOException { - updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions); - buffer.position(oldGroupAddress); - final int currentFlags = buffer.readUnsignedByte(); - buffer.position(oldGroupAddress); - buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags + updateParentAddress(dictBuffer, oldGroupAddress, dictBuffer.limit() + 1, formatOptions); + dictBuffer.position(oldGroupAddress); + final int currentFlags = dictBuffer.readUnsignedByte(); + dictBuffer.position(oldGroupAddress); + dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); int size = FormatSpec.GROUP_FLAGS_SIZE; - updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions); + updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions); size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info }); return size; } @SuppressWarnings("unused") - private static void updateForwardLink(final FusionDictionaryBufferInterface buffer, + private static void updateForwardLink(final DictBuffer dictBuffer, final int nodeArrayOriginAddress, final int newNodeArrayAddress, final FormatOptions formatOptions) { - buffer.position(nodeArrayOriginAddress); + dictBuffer.position(nodeArrayOriginAddress); int jumpCount = 0; while (jumpCount++ < MAX_JUMPS) { - final int count = BinaryDictDecoder.readCharGroupCount(buffer); - for (int i = 0; i < count; ++i) BinaryDictIOUtils.skipCharGroup(buffer, formatOptions); - final int forwardLinkAddress = buffer.readUnsignedInt24(); + final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); + for (int i = 0; i < count; ++i) { + BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions); + } + final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { - buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); - BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress); + dictBuffer.position(dictBuffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); + BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeArrayAddress); return; } - buffer.position(forwardLinkAddress); + dictBuffer.position(forwardLinkAddress); } if (DBG && jumpCount >= MAX_JUMPS) { throw new RuntimeException("too many jumps, probably a bug."); @@ -204,7 +208,7 @@ public final class DynamicBinaryDictIOUtils { * @param shortcutTargets the shortcut targets for this group. * @param bigrams the bigrams for this group. * @param destination the stream representing the tail of the file. - * @param buffer the buffer representing the (constant-size) body of the file. + * @param dictBuffer the DictBuffer representing the (constant-size) body of the file. * @param oldNodeArrayOrigin the origin of the old node array this group was a part of. * @param oldGroupOrigin the old origin where this group used to be stored. * @param formatOptions format options for this dictionary. @@ -215,7 +219,7 @@ public final class DynamicBinaryDictIOUtils { final int length, final int flags, final int frequency, final int parentAddress, final ArrayList shortcutTargets, final ArrayList bigrams, final OutputStream destination, - final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin, + final DictBuffer dictBuffer, final int oldNodeArrayOrigin, final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { int size = 0; final int newGroupOrigin = fileEndAddress + 1; @@ -228,7 +232,7 @@ public final class DynamicBinaryDictIOUtils { flags, writtenCharacters, frequency, parentAddress, fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, bigrams); - moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, + moveCharGroup(destination, dictBuffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, formatOptions); return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; } @@ -236,7 +240,7 @@ public final class DynamicBinaryDictIOUtils { /** * Insert a word into a binary dictionary. * - * @param dictReader the dict reader. + * @param dictDecoder the dict decoder. * @param destination a stream to the underlying file, with the pointer at the end of the file. * @param word the word to insert. * @param frequency the frequency of the new word. @@ -249,16 +253,17 @@ public final class DynamicBinaryDictIOUtils { // TODO: Support batch insertion. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. @UsedForTesting - public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination, - final String word, final int frequency, final ArrayList bigramStrings, + public static void insertWord(final BinaryDictDecoder dictDecoder, + final OutputStream destination, final String word, final int frequency, + final ArrayList bigramStrings, final ArrayList shortcuts, final boolean isNotAWord, final boolean isBlackListEntry) throws IOException, UnsupportedFormatException { final ArrayList bigrams = new ArrayList(); - final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); if (bigramStrings != null) { for (final WeightedString bigram : bigramStrings) { - int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord); + int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, bigram.mWord); if (position == FormatSpec.NOT_VALID_WORD) { // TODO: figure out what is the correct thing to do here. } else { @@ -272,24 +277,24 @@ public final class DynamicBinaryDictIOUtils { final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty(); // find the insert position of the word. - if (buffer.position() != 0) buffer.position(0); - final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); + if (dictBuffer.position() != 0) dictBuffer.position(0); + final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder); - int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); + int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position(); final int[] codePoints = FusionDictionary.getCodePoints(word); final int wordLen = codePoints.length; for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { if (wordPos >= wordLen) break; - nodeOriginAddress = buffer.position(); + nodeOriginAddress = dictBuffer.position(); int nodeParentAddress = -1; - final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer); + final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); boolean foundNextGroup = false; for (int i = 0; i < charGroupCount; ++i) { - address = buffer.position(); - final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, - buffer.position(), fileHeader.mFormatOptions); + address = dictBuffer.position(); + final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer, + dictBuffer.position(), fileHeader.mFormatOptions); final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, fileHeader.mFormatOptions); if (isMovedGroup) continue; @@ -308,18 +313,18 @@ public final class DynamicBinaryDictIOUtils { * after * abc - d - ef */ - final int newNodeAddress = buffer.limit(); + final int newNodeAddress = dictBuffer.limit(); final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, false /* isBlackListEntry */, fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, frequency, nodeParentAddress, shortcuts, bigrams, destination, - buffer, nodeOriginAddress, address, fileHeader.mFormatOptions); + dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions); final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, currentInfo.mCharacters.length); if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentAddresses(buffer, currentInfo.mChildrenAddress, + updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, fileHeader.mFormatOptions); } final CharGroupInfo newInfo2 = new CharGroupInfo( @@ -344,7 +349,7 @@ public final class DynamicBinaryDictIOUtils { * - c */ - final int newNodeAddress = buffer.limit(); + final int newNodeAddress = dictBuffer.limit(); final int childrenAddress = currentInfo.mChildrenAddress; // move prefix @@ -355,13 +360,13 @@ public final class DynamicBinaryDictIOUtils { fileHeader.mFormatOptions); int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, - destination, buffer, nodeOriginAddress, address, + destination, dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions); final int[] suffixCharacters = Arrays.copyOfRange( currentInfo.mCharacters, p, currentInfo.mCharacters.length); if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { - updateParentAddresses(buffer, currentInfo.mChildrenAddress, + updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress, newNodeAddress + written + 1, fileHeader.mFormatOptions); } final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags( @@ -403,7 +408,7 @@ public final class DynamicBinaryDictIOUtils { if (wordPos + currentInfo.mCharacters.length == wordLen) { // the word exists in the dictionary. // only update group. - final int newNodeAddress = buffer.limit(); + final int newNodeAddress = dictBuffer.limit(); final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, @@ -412,7 +417,7 @@ public final class DynamicBinaryDictIOUtils { -1 /* endAddress */, flags, currentInfo.mCharacters, frequency, nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, bigrams); - moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address, + moveCharGroup(destination, dictBuffer, newInfo, nodeOriginAddress, address, fileHeader.mFormatOptions); return; } @@ -430,8 +435,8 @@ public final class DynamicBinaryDictIOUtils { * after * ab - cd - e */ - final int newNodeAddress = buffer.limit(); - updateChildrenAddress(buffer, address, newNodeAddress, + final int newNodeAddress = dictBuffer.limit(); + updateChildrenAddress(dictBuffer, address, newNodeAddress, fileHeader.mFormatOptions); final int newGroupAddress = newNodeAddress + 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1; @@ -445,7 +450,7 @@ public final class DynamicBinaryDictIOUtils { BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo }); return; } - buffer.position(currentInfo.mChildrenAddress); + dictBuffer.position(currentInfo.mChildrenAddress); foundNextGroup = true; break; } @@ -454,8 +459,8 @@ public final class DynamicBinaryDictIOUtils { if (foundNextGroup) continue; // reached the end of the array. - final int linkAddressPosition = buffer.position(); - int nextLink = buffer.readUnsignedInt24(); + final int linkAddressPosition = dictBuffer.position(); + int nextLink = dictBuffer.readUnsignedInt24(); if ((nextLink & FormatSpec.MSB24) != 0) { nextLink = -(nextLink & FormatSpec.SINT24_MAX); } @@ -475,9 +480,9 @@ public final class DynamicBinaryDictIOUtils { */ // change the forward link address. - final int newNodeAddress = buffer.limit(); - buffer.position(linkAddressPosition); - BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); + final int newNodeAddress = dictBuffer.limit(); + dictBuffer.position(linkAddressPosition); + BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1, @@ -490,7 +495,7 @@ public final class DynamicBinaryDictIOUtils { return; } else { depth--; - buffer.position(nextLink); + dictBuffer.position(nextLink); } } } diff --git a/java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReaderInterface.java b/java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReader.java similarity index 96% rename from java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReaderInterface.java rename to java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReader.java index 7cddef2a4..f2badb444 100644 --- a/java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReaderInterface.java +++ b/java/src/com/android/inputmethod/latin/makedict/decoder/HeaderReader.java @@ -24,7 +24,7 @@ import java.util.HashMap; /** * An interface to read a binary dictionary file header. */ -public interface HeaderReaderInterface { +public interface HeaderReader { public int readVersion() throws IOException, UnsupportedFormatException; public int readOptionFlags(); public int readHeaderSize(); diff --git a/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java b/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java index 525d3cd11..916be4481 100644 --- a/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java +++ b/java/src/com/android/inputmethod/latin/personalization/DynamicPredictionDictionaryBase.java @@ -28,7 +28,7 @@ import com.android.inputmethod.latin.ExpandableDictionary; import com.android.inputmethod.latin.LatinImeLogger; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.WordComposer; -import com.android.inputmethod.latin.makedict.BinaryDictReader; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.settings.Settings; import com.android.inputmethod.latin.utils.CollectionUtils; @@ -241,10 +241,10 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableDictiona }; // Load the dictionary from binary file - final BinaryDictReader reader = new BinaryDictReader( + final BinaryDictDecoder reader = new BinaryDictDecoder( new File(getContext().getFilesDir(), fileName)); try { - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); + reader.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory()); UserHistoryDictIOUtils.readDictionaryBinary(reader, listener); } catch (FileNotFoundException e) { // This is an expected condition: we don't have a user history dictionary for this diff --git a/java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java b/java/src/com/android/inputmethod/latin/utils/ByteArrayDictBuffer.java similarity index 89% rename from java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java rename to java/src/com/android/inputmethod/latin/utils/ByteArrayDictBuffer.java index d93b61451..2028298f2 100644 --- a/java/src/com/android/inputmethod/latin/utils/ByteArrayWrapper.java +++ b/java/src/com/android/inputmethod/latin/utils/ByteArrayDictBuffer.java @@ -16,17 +16,17 @@ package com.android.inputmethod.latin.utils; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; /** * This class provides an implementation for the FusionDictionary buffer interface that is backed * by a simpled byte array. It allows to create a binary dictionary in memory. */ -public final class ByteArrayWrapper implements FusionDictionaryBufferInterface { +public final class ByteArrayDictBuffer implements DictBuffer { private byte[] mBuffer; private int mPosition; - public ByteArrayWrapper(final byte[] buffer) { + public ByteArrayDictBuffer(final byte[] buffer) { mBuffer = buffer; mPosition = 0; } diff --git a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java index cd03b3822..dd7f534dc 100644 --- a/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java +++ b/java/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtils.java @@ -22,7 +22,6 @@ import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; -import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -119,13 +118,13 @@ public final class UserHistoryDictIOUtils { /** * Reads dictionary from file. */ - public static void readDictionaryBinary(final BinaryDictReader reader, + public static void readDictionaryBinary(final BinaryDictDecoder dictDecoder, final OnAddWordListener dict) { final Map unigrams = CollectionUtils.newTreeMap(); final Map frequencies = CollectionUtils.newTreeMap(); final Map> bigrams = CollectionUtils.newTreeMap(); try { - BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, unigrams, frequencies, + BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies, bigrams); } catch (IOException e) { Log.e(TAG, "IO exception while reading file", e); @@ -157,7 +156,7 @@ public final class UserHistoryDictIOUtils { continue; } to.setBigram(word1, word2, - BinaryDictDecoder.reconstructBigramFrequency(unigramFrequency, + BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency, attr.mFrequency)); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java index be468c19b..77397a68c 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderEncoderTests.java @@ -22,7 +22,7 @@ import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; import android.util.SparseArray; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; @@ -44,7 +44,7 @@ import java.util.Random; import java.util.Set; /** - * Unit tests for BinaryDictDecoder and BinaryDictEncoder. + * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoder. */ @LargeTest public class BinaryDictDecoderEncoderTests extends AndroidTestCase { @@ -118,14 +118,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Utilities for test /** - * Makes new buffer according to BUFFER_TYPE. + * Makes new DictBuffer according to BUFFER_TYPE. */ - private void getBuffer(final BinaryDictReader reader, final int bufferType) + private void getDictBuffer(final BinaryDictDecoder dictDecoder, final int bufferType) throws FileNotFoundException, IOException { if (bufferType == USE_BYTE_BUFFER) { - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); } else if (bufferType == USE_BYTE_ARRAY) { - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory()); } } @@ -269,14 +271,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final SparseArray> bigrams, final Map> shortcutMap, final int bufferType) { long now, diff = -1; - final BinaryDictReader reader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); FusionDictionary dict = null; try { - getBuffer(reader, bufferType); - assertNotNull(reader.getBuffer()); + getDictBuffer(dictDecoder, bufferType); + assertNotNull(dictDecoder.getDictBuffer()); now = System.currentTimeMillis(); - dict = BinaryDictDecoder.readDictionaryBinary(reader, null); + dict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null); diff = System.currentTimeMillis() - now; } catch (IOException e) { Log.e(TAG, "IOException while reading dictionary", e); @@ -388,7 +390,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } actBigrams.get(word1).add(word2); - final int bigramFreq = BinaryDictDecoder.reconstructBigramFrequency( + final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency( unigramFreq, attr.mFrequency); assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); } @@ -407,12 +409,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { final Map resultFreqs = CollectionUtils.newTreeMap(); long now = -1, diff = -1; - final BinaryDictReader reader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); try { - getBuffer(reader, bufferType); - assertNotNull("Can't get buffer.", reader.getBuffer()); + getDictBuffer(dictDecoder, bufferType); + assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer()); now = System.currentTimeMillis(); - BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, resultWords, resultFreqs, + BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs, resultBigrams); diff = System.currentTimeMillis() - now; } catch (IOException e) { @@ -497,31 +499,31 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } // Tests for getTerminalPosition - private String getWordFromBinary(final BinaryDictReader dictReader, final int address) { - final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); - if (buffer.position() != 0) buffer.position(0); + private String getWordFromBinary(final BinaryDictDecoder dictDecoder, final int address) { + final DictBuffer dictBuffer = dictDecoder.getDictBuffer(); + if (dictBuffer.position() != 0) dictBuffer.position(0); FileHeader fileHeader = null; try { - fileHeader = BinaryDictDecoder.readHeader(dictReader); + fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder); } catch (IOException e) { return null; } catch (UnsupportedFormatException e) { return null; } if (fileHeader == null) return null; - return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize, + return BinaryDictDecoderUtils.getWordAtAddress(dictBuffer, fileHeader.mHeaderSize, address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord; } - private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index, - boolean contained) { + private long runGetTerminalPosition(final BinaryDictDecoder dictDecoder, final String word, + int index, boolean contained) { final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; long diff = -1; int position = -1; try { final long now = System.nanoTime(); - position = BinaryDictIOUtils.getTerminalPosition(reader, word); + position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word); diff = System.nanoTime() - now; } catch (IOException e) { Log.e(TAG, "IOException while getTerminalPosition", e); @@ -530,7 +532,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { } assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); - if (contained) assertEquals(getWordFromBinary(reader, position), word); + if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word); return diff; } @@ -550,28 +552,29 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); - final BinaryDictReader reader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); try { - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory()); } catch (IOException e) { // ignore Log.e(TAG, "IOException while opening the buffer", e); } - assertNotNull("Can't get the buffer", reader.getBuffer()); + assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer()); try { // too long word final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, longWord)); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, longWord)); // null assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, null)); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, null)); // empty string assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, "")); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, "")); } catch (IOException e) { } catch (UnsupportedFormatException e) { } @@ -579,7 +582,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { // Test a word that is contained within the dictionary. long sum = 0; for (int i = 0; i < sWords.size(); ++i) { - final long time = runGetTerminalPosition(reader, sWords.get(i), i, true); + final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true); sum += time == -1 ? 0 : time; } Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); @@ -590,7 +593,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { for (int i = 0; i < 1000; ++i) { final String word = generateWord(random, codePointSet); if (sWords.indexOf(word) != -1) continue; - runGetTerminalPosition(reader, word, i, false); + runGetTerminalPosition(dictDecoder, word, i, false); } } @@ -610,28 +613,28 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase { addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); - final BinaryDictReader reader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); try { - reader.openBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory()); } catch (IOException e) { // ignore Log.e(TAG, "IOException while opening the buffer", e); } - assertNotNull("Can't get the buffer", reader.getBuffer()); + assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer()); try { MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0))); - DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0)); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0))); + DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0)); assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0))); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0))); MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5))); - DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5)); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5))); + DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5)); assertEquals(FormatSpec.NOT_VALID_WORD, - BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5))); + BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5))); } catch (IOException e) { } catch (UnsupportedFormatException e) { } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderTests.java similarity index 67% rename from tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java rename to tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderTests.java index 1c6de50f0..03742c4c1 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictReaderTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictDecoderTests.java @@ -16,14 +16,14 @@ package com.android.inputmethod.latin.makedict; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; -import com.android.inputmethod.latin.makedict.BinaryDictReader.FusionDictionaryBufferFactory; -import com.android.inputmethod.latin.makedict.BinaryDictReader. - FusionDictionaryBufferFromByteArrayFactory; -import com.android.inputmethod.latin.makedict.BinaryDictReader. - FusionDictionaryBufferFromByteBufferFactory; -import com.android.inputmethod.latin.makedict.BinaryDictReader. - FusionDictionaryBufferFromWritableByteBufferFactory; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder.DictionaryBufferFactory; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder. + DictionaryBufferFromByteArrayFactory; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder. + DictionaryBufferFromReadOnlyByteBufferFactory; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder. + DictionaryBufferFromWritableByteBufferFactory; import android.test.AndroidTestCase; import android.util.Log; @@ -33,10 +33,10 @@ import java.io.FileOutputStream; import java.io.IOException; /** - * Unit tests for BinaryDictReader + * Unit tests for BinaryDictDecoder */ -public class BinaryDictReaderTests extends AndroidTestCase { - private static final String TAG = BinaryDictReaderTests.class.getSimpleName(); +public class BinaryDictDecoderTests extends AndroidTestCase { + private static final String TAG = BinaryDictDecoderTests.class.getSimpleName(); private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; @@ -61,7 +61,7 @@ public class BinaryDictReaderTests extends AndroidTestCase { @SuppressWarnings("null") public void runTestOpenBuffer(final String testName, - final FusionDictionaryBufferFactory factory) { + final DictionaryBufferFactory factory) { File testFile = null; try { testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); @@ -70,9 +70,9 @@ public class BinaryDictReaderTests extends AndroidTestCase { } assertNotNull(testFile); - final BinaryDictReader reader = new BinaryDictReader(testFile); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile); try { - reader.openBuffer(factory); + dictDecoder.openDictBuffer(factory); } catch (Exception e) { Log.e(TAG, "Failed to open the buffer", e); } @@ -80,32 +80,32 @@ public class BinaryDictReaderTests extends AndroidTestCase { writeDataToFile(testFile); try { - reader.openBuffer(factory); + dictDecoder.openDictBuffer(factory); } catch (Exception e) { Log.e(TAG, "Raised the exception while opening buffer", e); } - assertEquals(testFile.length(), reader.getBuffer().capacity()); + assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity()); } public void testOpenBufferWithByteBuffer() { runTestOpenBuffer("testOpenBufferWithByteBuffer", - new FusionDictionaryBufferFromByteBufferFactory()); + new DictionaryBufferFromReadOnlyByteBufferFactory()); } public void testOpenBufferWithByteArray() { runTestOpenBuffer("testOpenBufferWithByteArray", - new FusionDictionaryBufferFromByteArrayFactory()); + new DictionaryBufferFromByteArrayFactory()); } public void testOpenBufferWithWritableByteBuffer() { runTestOpenBuffer("testOpenBufferWithWritableByteBuffer", - new FusionDictionaryBufferFromWritableByteBufferFactory()); + new DictionaryBufferFromWritableByteBufferFactory()); } @SuppressWarnings("null") public void runTestGetBuffer(final String testName, - final FusionDictionaryBufferFactory factory) { + final DictionaryBufferFactory factory) { File testFile = null; try { testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); @@ -113,40 +113,41 @@ public class BinaryDictReaderTests extends AndroidTestCase { Log.e(TAG, "IOException while the creating temporary file", e); } - final BinaryDictReader reader = new BinaryDictReader(testFile); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile); // the default return value of getBuffer() must be null. - assertNull("the default return value of getBuffer() is not null", reader.getBuffer()); + assertNull("the default return value of getBuffer() is not null", + dictDecoder.getDictBuffer()); writeDataToFile(testFile); assertTrue(testFile.exists()); Log.d(TAG, "file length = " + testFile.length()); - FusionDictionaryBufferInterface buffer = null; + DictBuffer dictBuffer = null; try { - buffer = reader.openAndGetBuffer(factory); + dictBuffer = dictDecoder.openAndGetDictBuffer(factory); } catch (IOException e) { Log.e(TAG, "Failed to open and get the buffer", e); } - assertNotNull("the buffer must not be null", buffer); + assertNotNull("the buffer must not be null", dictBuffer); for (int i = 0; i < data.length; ++i) { - assertEquals(data[i], buffer.readUnsignedByte()); + assertEquals(data[i], dictBuffer.readUnsignedByte()); } } public void testGetBufferWithByteBuffer() { runTestGetBuffer("testGetBufferWithByteBuffer", - new FusionDictionaryBufferFromByteBufferFactory()); + new DictionaryBufferFromReadOnlyByteBufferFactory()); } public void testGetBufferWithByteArray() { runTestGetBuffer("testGetBufferWithByteArray", - new FusionDictionaryBufferFromByteArrayFactory()); + new DictionaryBufferFromByteArrayFactory()); } public void testGetBufferWithWritableByteBuffer() { runTestGetBuffer("testGetBufferWithWritableByteBuffer", - new FusionDictionaryBufferFromWritableByteBufferFactory()); + new DictionaryBufferFromWritableByteBufferFactory()); } } diff --git a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java index bcf2c3187..7c0f07d43 100644 --- a/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/makedict/BinaryDictIOUtilsTests.java @@ -21,9 +21,9 @@ import android.test.MoreAsserts; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; -import com.android.inputmethod.latin.makedict.BinaryDictReader. - FusionDictionaryBufferFromWritableByteBufferFactory; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder. + DictionaryBufferFromWritableByteBufferFactory; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; @@ -112,26 +112,26 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { Log.d(TAG, " end address = " + info.mEndAddress); } - private static void printNode(final FusionDictionaryBufferInterface buffer, + private static void printNode(final DictBuffer dictBuffer, final FormatSpec.FormatOptions formatOptions) { - Log.d(TAG, "Node at " + buffer.position()); - final int count = BinaryDictDecoder.readCharGroupCount(buffer); + Log.d(TAG, "Node at " + dictBuffer.position()); + final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer); Log.d(TAG, " charGroupCount = " + count); for (int i = 0; i < count; ++i) { - final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, - buffer.position(), formatOptions); + final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer, + dictBuffer.position(), formatOptions); printCharGroup(currentInfo); } if (formatOptions.mSupportsDynamicUpdate) { - final int forwardLinkAddress = buffer.readUnsignedInt24(); + final int forwardLinkAddress = dictBuffer.readUnsignedInt24(); Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); } } - private static void printBinaryFile(final BinaryDictReader dictReader) + private static void printBinaryFile(final BinaryDictDecoder dictDecoder) throws IOException, UnsupportedFormatException { - final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); - final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); + final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder); + final DictBuffer buffer = dictDecoder.getDictBuffer(); while (buffer.position() < buffer.limit()) { printNode(buffer, fileHeader.mFormatOptions); } @@ -139,13 +139,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { private int getWordPosition(final File file, final String word) { int position = FormatSpec.NOT_VALID_WORD; - final BinaryDictReader dictReader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); FileInputStream inStream = null; try { inStream = new FileInputStream(file); - dictReader.openBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - position = BinaryDictIOUtils.getTerminalPosition(dictReader, word); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word); } catch (IOException e) { } catch (UnsupportedFormatException e) { } finally { @@ -161,12 +161,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } private CharGroupInfo findWordFromFile(final File file, final String word) { - final BinaryDictReader dictReader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); CharGroupInfo info = null; try { - dictReader.openBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + info = BinaryDictIOUtils.findWordByBinaryDictReader(dictDecoder, word); } catch (IOException e) { } catch (UnsupportedFormatException e) { } @@ -177,18 +177,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { private long insertAndCheckWord(final File file, final String word, final int frequency, final boolean exist, final ArrayList bigrams, final ArrayList shortcuts) { - final BinaryDictReader dictReader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); BufferedOutputStream outStream = null; long amountOfTime = -1; try { - dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory()); + dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory()); outStream = new BufferedOutputStream(new FileOutputStream(file, true)); if (!exist) { assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); } final long now = System.nanoTime(); - DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams, + DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams, shortcuts, false, false); amountOfTime = System.nanoTime() - now; outStream.flush(); @@ -211,23 +211,23 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase { } private void deleteWord(final File file, final String word) { - final BinaryDictReader dictReader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); try { - dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory()); - DynamicBinaryDictIOUtils.deleteWord(dictReader, word); + dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory()); + DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word); } catch (IOException e) { } catch (UnsupportedFormatException e) { } } private void checkReverseLookup(final File file, final String word, final int position) { - final BinaryDictReader dictReader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); try { - final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); + final DictBuffer dictBuffer = dictDecoder.openAndGetDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder); assertEquals(word, - BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(), + BinaryDictDecoderUtils.getWordAtAddress(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize, position - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord); } catch (IOException e) { diff --git a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java index 8f5bec8a4..83d9c2122 100644 --- a/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java +++ b/tests/src/com/android/inputmethod/latin/utils/UserHistoryDictIOUtilsTests.java @@ -21,7 +21,7 @@ import android.test.AndroidTestCase; import android.test.suitebuilder.annotation.LargeTest; import android.util.Log; -import com.android.inputmethod.latin.makedict.BinaryDictReader; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; @@ -147,15 +147,16 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase } private void readDictFromFile(final File file, final OnAddWordListener listener) { - final BinaryDictReader reader = new BinaryDictReader(file); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); try { - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory()); } catch (FileNotFoundException e) { Log.e(TAG, "file not found", e); } catch (IOException e) { Log.e(TAG, "IOException", e); } - UserHistoryDictIOUtils.readDictionaryBinary(reader, listener); + UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener); } public void testGenerateFusionDictionary() { diff --git a/tools/dicttool/Android.mk b/tools/dicttool/Android.mk index f076ef277..3d09c0508 100644 --- a/tools/dicttool/Android.mk +++ b/tools/dicttool/Android.mk @@ -28,7 +28,7 @@ LATINIME_ANNOTATIONS_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/annot LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict USED_TARGETTED_UTILS := \ - $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayWrapper.java \ + $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java index 4ddfb405f..39ba69b1f 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtils.java @@ -16,8 +16,8 @@ package com.android.inputmethod.latin.dicttool; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; import com.android.inputmethod.latin.makedict.BinaryDictDecoder; -import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.UnsupportedFormatException; @@ -97,7 +97,7 @@ public final class BinaryDictOffdeviceUtils { // over and over, ending in a stack overflow. Hence we limit the depth at which we try // decoding the file. if (depth > MAX_DECODE_DEPTH) return null; - if (BinaryDictDecoder.isBinaryDictionary(src)) { + if (BinaryDictDecoderUtils.isBinaryDictionary(src)) { spec.mFile = src; return spec; } @@ -184,15 +184,15 @@ public final class BinaryDictOffdeviceUtils { crash(filename, new RuntimeException( filename + " does not seem to be a dictionary file")); } else { - final BinaryDictReader reader = new BinaryDictReader(decodedSpec.mFile); - reader.openBuffer( - new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodedSpec.mFile); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory()); if (report) { System.out.println("Format : Binary dictionary format"); System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); } - return BinaryDictDecoder.readDictionaryBinary(reader, null); + return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null); } } } catch (IOException e) { diff --git a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java index 09052c937..f87e9722c 100644 --- a/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java +++ b/tools/dicttool/src/com/android/inputmethod/latin/dicttool/DictionaryMaker.java @@ -16,9 +16,9 @@ package com.android.inputmethod.latin.dicttool; -import com.android.inputmethod.latin.makedict.BinaryDictDecoder; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; import com.android.inputmethod.latin.makedict.BinaryDictEncoder; -import com.android.inputmethod.latin.makedict.BinaryDictReader; +import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.MakedictLog; @@ -176,7 +176,7 @@ public class DictionaryMaker { inputUnigramXml = filename; } else if (CombinedInputOutput.isCombinedDictionary(filename)) { inputCombined = filename; - } else if (BinaryDictDecoder.isBinaryDictionary(filename)) { + } else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) { inputBinary = filename; } else { throw new IllegalArgumentException( @@ -198,7 +198,7 @@ public class DictionaryMaker { } } else { if (null == inputBinary && null == inputUnigramXml) { - if (BinaryDictDecoder.isBinaryDictionary(arg)) { + if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) { inputBinary = arg; } else if (CombinedInputOutput.isCombinedDictionary(arg)) { inputCombined = arg; @@ -266,9 +266,10 @@ public class DictionaryMaker { private static FusionDictionary readBinaryFile(final String binaryFilename) throws FileNotFoundException, IOException, UnsupportedFormatException { final File file = new File(binaryFilename); - final BinaryDictReader reader = new BinaryDictReader(file); - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - return BinaryDictDecoder.readDictionaryBinary(reader, null); + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null); } /** diff --git a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java index 3bda77fe9..b960b035d 100644 --- a/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java +++ b/tools/dicttool/tests/com/android/inputmethod/latin/dicttool/BinaryDictOffdeviceUtilsTests.java @@ -17,8 +17,8 @@ package com.android.inputmethod.latin.dicttool; import com.android.inputmethod.latin.makedict.BinaryDictDecoder; +import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils; import com.android.inputmethod.latin.makedict.BinaryDictEncoder; -import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; @@ -67,9 +67,10 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase { assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step); } assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size()); - final BinaryDictReader reader = new BinaryDictReader(decodeSpec.mFile); - reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); - final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader, + final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodeSpec.mFile); + dictDecoder.openDictBuffer( + new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory()); + final FusionDictionary resultDict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null /* dict : an optional dictionary to add words to, or null */); assertEquals("Dictionary can't be read back correctly", FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),