am 77bce05e: [Refactor] Rename BinaryDictReader and BinaryDictDecoder.

* commit '77bce05e6f6e3a988253f9305ae22e51f56f5b1a':
  [Refactor] Rename BinaryDictReader and BinaryDictDecoder.
This commit is contained in:
Yuichiro Hanada 2013-08-19 03:51:22 -07:00 committed by Android Git Automerger
commit 4893fe5719
19 changed files with 1231 additions and 1202 deletions

View file

@ -21,7 +21,7 @@ import android.content.SharedPreferences;
import android.content.res.AssetFileDescriptor; import android.content.res.AssetFileDescriptor;
import android.util.Log; import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.DictionaryInfoUtils; import com.android.inputmethod.latin.utils.DictionaryInfoUtils;
@ -231,17 +231,17 @@ final public class BinaryDictionaryGetter {
try { try {
// Read the version of the file // Read the version of the file
inStream = new FileInputStream(f); inStream = new FileInputStream(f);
final BinaryDictDecoder.ByteBufferWrapper buffer = final BinaryDictDecoderUtils.ByteBufferDictBuffer dictBuffer =
new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map( new BinaryDictDecoderUtils.ByteBufferDictBuffer(inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, f.length())); FileChannel.MapMode.READ_ONLY, 0, f.length()));
final int magic = buffer.readInt(); final int magic = dictBuffer.readInt();
if (magic != FormatSpec.MAGIC_NUMBER) { if (magic != FormatSpec.MAGIC_NUMBER) {
return false; return false;
} }
final int formatVersion = buffer.readInt(); final int formatVersion = dictBuffer.readInt();
final int headerSize = buffer.readInt(); final int headerSize = dictBuffer.readInt();
final HashMap<String, String> options = CollectionUtils.newHashMap(); final HashMap<String, String> options = CollectionUtils.newHashMap();
BinaryDictDecoder.populateOptions(buffer, headerSize, options); BinaryDictDecoderUtils.populateOptions(dictBuffer, headerSize, options);
final String version = options.get(VERSION_KEY); final String version = options.get(VERSION_KEY);
if (null == version) { if (null == version) {

View file

@ -17,35 +17,23 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.JniUtils; import com.android.inputmethod.latin.utils.JniUtils;
import java.io.ByteArrayOutputStream;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
import java.io.FileNotFoundException; import java.io.FileNotFoundException;
import java.io.IOException; import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer; import java.nio.ByteBuffer;
import java.nio.channels.FileChannel; import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap; import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
/** @UsedForTesting
* Decodes binary files for a FusionDictionary. public class BinaryDictDecoder implements HeaderReader {
*
* All the methods in this class are static.
*/
public final class BinaryDictDecoder {
private static final boolean DBG = MakedictLog.DBG;
static { static {
JniUtils.loadNativeLibrary(); JniUtils.loadNativeLibrary();
@ -54,742 +42,148 @@ public final class BinaryDictDecoder {
// TODO: implement something sensical instead of just a phony method // TODO: implement something sensical instead of just a phony method
private static native int doNothing(); private static native int doNothing();
private BinaryDictDecoder() { public interface DictionaryBufferFactory {
// This utility class is not publicly instantiable. public DictBuffer getDictionaryBuffer(final File file)
} throws FileNotFoundException, IOException;
private static final int MAX_JUMPS = 12;
@UsedForTesting
public interface FusionDictionaryBufferInterface {
public int readUnsignedByte();
public int readUnsignedShort();
public int readUnsignedInt24();
public int readInt();
public int position();
public void position(int newPosition);
public void put(final byte b);
public int limit();
@UsedForTesting
public int capacity();
}
public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
private ByteBuffer mBuffer;
public ByteBufferWrapper(final ByteBuffer buffer) {
mBuffer = buffer;
}
@Override
public int readUnsignedByte() {
return mBuffer.get() & 0xFF;
}
@Override
public int readUnsignedShort() {
return mBuffer.getShort() & 0xFFFF;
}
@Override
public int readUnsignedInt24() {
final int retval = readUnsignedByte();
return (retval << 16) + readUnsignedShort();
}
@Override
public int readInt() {
return mBuffer.getInt();
}
@Override
public int position() {
return mBuffer.position();
}
@Override
public void position(int newPos) {
mBuffer.position(newPos);
}
@Override
public void put(final byte b) {
mBuffer.put(b);
}
@Override
public int limit() {
return mBuffer.limit();
}
@Override
public int capacity() {
return mBuffer.capacity();
}
} }
/** /**
* A class grouping utility function for our specific character encoding. * Creates DictionaryBuffer using a ByteBuffer
*/
static final class CharEncoding {
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
/**
* Helper method to find out whether this code fits on one byte
*/
private static boolean fitsOnOneByte(final int character) {
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
}
/**
* Compute the size of a character given its character code.
*
* Char format is:
* 1 byte = bbbbbbbb match
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
* 00011111 would be outside unicode.
* else: iso-latin-1 code
* This allows for the whole unicode range to be encoded, including chars outside of
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
* characters which should never happen anyway (and still work, but take 3 bytes).
*
* @param character the character code.
* @return the size in binary encoded-form, either 1 or 3 bytes.
*/
static int getCharSize(final int character) {
// See char encoding in FusionDictionary.java
if (fitsOnOneByte(character)) return 1;
if (FormatSpec.INVALID_CHARACTER == character) return 1;
return 3;
}
/**
* Compute the byte size of a character array.
*/
static int getCharArraySize(final int[] chars) {
int size = 0;
for (int character : chars) size += getCharSize(character);
return size;
}
/**
* Writes a char array to a byte buffer.
*
* @param codePoints the code point array to write.
* @param buffer the byte buffer to write to.
* @param index the index in buffer to write the character array to.
* @return the index after the last character.
*/
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
for (int codePoint : codePoints) {
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
return index;
}
/**
* Writes a string with our character format to a byte buffer.
*
* This will also write the terminator byte.
*
* @param buffer the byte buffer to write to.
* @param origin the offset to write from.
* @param word the string to write.
* @return the size written, in bytes.
*/
static int writeString(final byte[] buffer, final int origin,
final String word) {
final int length = word.length();
int index = origin;
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
return index - origin;
}
/**
* Writes a string with our character format to a ByteArrayOutputStream.
*
* This will also write the terminator byte.
*
* @param buffer the ByteArrayOutputStream to write to.
* @param word the string to write.
*/
static void writeString(final ByteArrayOutputStream buffer, final String word) {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer.write((byte) codePoint);
} else {
buffer.write((byte) (0xFF & (codePoint >> 16)));
buffer.write((byte) (0xFF & (codePoint >> 8)));
buffer.write((byte) (0xFF & codePoint));
}
}
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
}
/**
* Reads a string from a buffer. This is the converse of the above method.
*/
static String readString(final FusionDictionaryBufferInterface buffer) {
final StringBuilder s = new StringBuilder();
int character = readChar(buffer);
while (character != FormatSpec.INVALID_CHARACTER) {
s.appendCodePoint(character);
character = readChar(buffer);
}
return s.toString();
}
/**
* Reads a character from the buffer.
*
* This follows the character format documented earlier in this source file.
*
* @param buffer the buffer, positioned over an encoded character.
* @return the character code.
*/
static int readChar(final FusionDictionaryBufferInterface buffer) {
int character = buffer.readUnsignedByte();
if (!fitsOnOneByte(character)) {
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
return FormatSpec.INVALID_CHARACTER;
}
character <<= 16;
character += buffer.readUnsignedShort();
}
return character;
}
}
// Input methods: Read a binary dictionary to memory.
// readDictionaryBinary is the public entry point for them.
static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
final int optionFlags, final FormatOptions options) {
if (options.mSupportsDynamicUpdate) {
final int address = buffer.readUnsignedInt24();
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
if ((address & FormatSpec.MSB24) != 0) {
return -(address & FormatSpec.SINT24_MAX);
} else {
return address;
}
}
int address;
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
return buffer.readUnsignedByte();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
return buffer.readUnsignedShort();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
return buffer.readUnsignedInt24();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
static int readParentAddress(final FusionDictionaryBufferInterface buffer,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
final int parentAddress = buffer.readUnsignedInt24();
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
return sign * (parentAddress & FormatSpec.SINT24_MAX);
} else {
return FormatSpec.NO_PARENT_ADDRESS;
}
}
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
final int originalGroupAddress, final FormatOptions options) {
int addressPointer = originalGroupAddress;
final int flags = buffer.readUnsignedByte();
++addressPointer;
final int parentAddress = readParentAddress(buffer, options);
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
addressPointer += 3;
}
final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0;
int character = CharEncoding.readChar(buffer);
addressPointer += CharEncoding.getCharSize(character);
while (-1 != character) {
// FusionDictionary is making sure that the length of the word is smaller than
// MAX_WORD_LENGTH.
// So we'll never write past the end of CHARACTER_BUFFER.
CHARACTER_BUFFER[index++] = character;
character = CharEncoding.readChar(buffer);
addressPointer += CharEncoding.getCharSize(character);
}
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
} else {
final int character = CharEncoding.readChar(buffer);
addressPointer += CharEncoding.getCharSize(character);
characters = new int[] { character };
}
final int frequency;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
++addressPointer;
frequency = buffer.readUnsignedByte();
} else {
frequency = CharGroup.NOT_A_TERMINAL;
}
int childrenAddress = readChildrenAddress(buffer, flags, options);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
final int pointerBefore = buffer.position();
shortcutTargets = new ArrayList<WeightedString>();
buffer.readUnsignedShort(); // Skip the size
while (true) {
final int targetFlags = buffer.readUnsignedByte();
final String word = CharEncoding.readString(buffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
addressPointer += buffer.position() - pointerBefore;
}
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = buffer.readUnsignedByte();
++addressPointer;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = addressPointer;
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * buffer.readUnsignedByte();
addressPointer += 1;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * buffer.readUnsignedShort();
addressPointer += 2;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
final int offset = (buffer.readUnsignedByte() << 16)
+ buffer.readUnsignedShort();
bigramAddress += sign * offset;
addressPointer += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
MakedictLog.d("too many bigrams in a group.");
}
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams);
}
/**
* Reads and returns the char group count out of a buffer and forwards the pointer.
*/
public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) {
final int msb = buffer.readUnsignedByte();
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
return msb;
} else {
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+ buffer.readUnsignedByte();
}
}
/**
* Finds, as a string, the word at the address passed as an argument.
* *
* @param buffer the buffer to read from. * This class uses less memory than DictionaryBufferFromByteArrayFactory,
* @param headerSize the size of the header. * but doesn't perform as fast.
* @param address the address to seek. * When operating on a big dictionary, this class is preferred.
* @param formatOptions file format options.
* @return the word with its frequency, as a weighted string.
*/ */
/* package for tests */ static WeightedString getWordAtAddress( public static final class DictionaryBufferFromReadOnlyByteBufferFactory
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, implements DictionaryBufferFactory {
final FormatOptions formatOptions) { @Override
final WeightedString result; public DictBuffer getDictionaryBuffer(final File file)
final int originalPointer = buffer.position(); throws FileNotFoundException, IOException {
buffer.position(address); FileInputStream inStream = null;
ByteBuffer buffer = null;
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) { try {
result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions); inStream = new FileInputStream(file);
} else { buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address, 0, file.length());
formatOptions); } finally {
} if (inStream != null) {
inStream.close();
buffer.position(originalPointer); }
return result; }
} if (buffer != null) {
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
@SuppressWarnings("unused") }
private static WeightedString getWordAtAddressWithParentAddress( return null;
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address, }
final FormatOptions options) { }
int currentAddress = address;
int frequency = Integer.MIN_VALUE; /**
final StringBuilder builder = new StringBuilder(); * Creates DictionaryBuffer using a byte array
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH *
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) { * This class performs faster than other classes, but consumes more memory.
CharGroupInfo currentInfo; * When operating on a small dictionary, this class is preferred.
int loopCounter = 0; */
do { public static final class DictionaryBufferFromByteArrayFactory
buffer.position(currentAddress + headerSize); implements DictionaryBufferFactory {
currentInfo = readCharGroup(buffer, currentAddress, options); @Override
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) { public DictBuffer getDictionaryBuffer(final File file)
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress; throws FileNotFoundException, IOException {
} FileInputStream inStream = null;
if (DBG && loopCounter++ > MAX_JUMPS) { try {
MakedictLog.d("Too many jumps - probably a bug"); inStream = new FileInputStream(file);
} final byte[] array = new byte[(int) file.length()];
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)); inStream.read(array);
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency; return new ByteArrayDictBuffer(array);
builder.insert(0, } finally {
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length)); if (inStream != null) {
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
return new WeightedString(builder.toString(), frequency);
}
private static WeightedString getWordAtAddressWithoutParentAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions options) {
buffer.position(headerSize);
final int count = readCharGroupCount(buffer);
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
final StringBuilder builder = new StringBuilder();
WeightedString result = null;
CharGroupInfo last = null;
for (int i = count - 1; i >= 0; --i) {
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
groupOffset = info.mEndAddress;
if (info.mOriginalAddress == address) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
result = new WeightedString(builder.toString(), info.mFrequency);
break; // and return
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
if (info.mChildrenAddress > address) {
if (null == last) continue;
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
buffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(buffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
last = info;
}
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
buffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(buffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
}
return result;
}
/**
* Reads a single node array from a buffer.
*
* This methods reads the file at the current position. A node array is fully expected to start
* at the current position.
* This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes.
*
* @param buffer the buffer, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups.
* @param options file format options.
* @return the read node array with all his children already read.
*/
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
throws IOException {
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
final int nodeArrayOrigin = buffer.position() - headerSize;
do { // Scan the linked-list node.
final int nodeArrayHeadPosition = buffer.position() - headerSize;
final int count = readCharGroupCount(buffer);
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
for (PendingAttribute bigram : info.mBigrams) {
final WeightedString word = getWordAtAddress(
buffer, headerSize, bigram.mAddress, options);
final int reconstructedFrequency =
reconstructBigramFrequency(word.mFrequency, bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
}
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) {
final int currentPosition = buffer.position();
buffer.position(info.mChildrenAddress + headerSize);
children = readNodeArray(
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
buffer.position(currentPosition);
}
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else {
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
}
groupOffset = info.mEndAddress;
}
// reach the end of the array.
if (options.mSupportsDynamicUpdate) {
final int nextAddress = buffer.readUnsignedInt24();
if (nextAddress >= 0 && nextAddress < buffer.limit()) {
buffer.position(nextAddress);
} else {
break;
}
}
} while (options.mSupportsDynamicUpdate &&
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return nodeArray;
}
/**
* Helper function to get the binary format version from the header.
* @throws IOException
*/
private static int getFormatVersion(final FusionDictionaryBufferInterface buffer)
throws IOException {
final int magic = buffer.readInt();
if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort();
return FormatSpec.NOT_A_VERSION_NUMBER;
}
/**
* Helper function to get and validate the binary format version.
* @throws UnsupportedFormatException
* @throws IOException
*/
static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
throws IOException, UnsupportedFormatException {
final int version = getFormatVersion(buffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("This file has version " + version
+ ", but this implementation does not support versions above "
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
}
return version;
}
/**
* Reads a header from a buffer.
* @param headerReader the header reader
* @throws IOException
* @throws UnsupportedFormatException
*/
public static FileHeader readHeader(final HeaderReaderInterface headerReader)
throws IOException, UnsupportedFormatException {
final int version = headerReader.readVersion();
final int optionsFlags = headerReader.readOptionFlags();
final int headerSize = headerReader.readHeaderSize();
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
return header;
}
/**
* Reads options from a buffer and populate a map with their contents.
*
* The buffer is read at the current position, so the caller must take care the pointer
* is in the right place before calling this.
*/
public static void populateOptions(final FusionDictionaryBufferInterface buffer,
final int headerSize, final HashMap<String, String> options) {
while (buffer.position() < headerSize) {
final String key = CharEncoding.readString(buffer);
final String value = CharEncoding.readString(buffer);
options.put(key, value);
}
}
/**
* Reads a buffer and returns the memory representation of the dictionary.
*
* This high-level method takes a buffer and reads its contents, populating a
* FusionDictionary structure. The optional dict argument is an existing dictionary to
* which words from the buffer should be added. If it is null, a new dictionary is created.
*
* @param reader the reader.
* @param dict an optional dictionary to add words to, or null.
* @return the created (or merged) dictionary.
*/
@UsedForTesting
public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader,
final FusionDictionary dict) throws FileNotFoundException, IOException,
UnsupportedFormatException {
// if the buffer has not been opened, open the buffer with bytebuffer.
if (reader.getBuffer() == null) reader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
if (reader.getBuffer() == null) {
MakedictLog.e("Cannot open the buffer");
}
// Read header
final FileHeader fileHeader = readHeader(reader);
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) {
for (final Word w : dict) {
if (w.mIsBlacklistEntry) {
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
} else {
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
}
}
for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to
// words that are not also registered as unigrams so we don't have to avoid
// them explicitly here.
for (final WeightedString bigram : w.mBigrams) {
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
}
}
}
return newDict;
}
/**
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
*/
public static boolean isBinaryDictionary(final String filename) {
final File file = new File(filename);
return isBinaryDictionary(file);
}
/**
* Basic test to find out whether the file is a binary dictionary or not.
*
* Concretely this only tests the magic number.
*
* @param file The file to test.
* @return true if it's a binary dictionary, false otherwise
*/
public static boolean isBinaryDictionary(final File file) {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, file.length());
final int version = getFormatVersion(new ByteBufferWrapper(buffer));
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
return false;
} finally {
if (inStream != null) {
try {
inStream.close(); inStream.close();
} catch (IOException e) {
// do nothing
} }
} }
} }
} }
/** /**
* Calculate bigram frequency from compressed value * Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
* *
* @param unigramFrequency * This class doesn't perform as fast as other classes,
* @param bigramFrequency compressed frequency * but this class is the only option available for destructive operations (insert or delete)
* @return approximate bigram frequency * on a dictionary.
*/ */
public static int reconstructBigramFrequency(final int unigramFrequency, @UsedForTesting
final int bigramFrequency) { public static final class DictionaryBufferFromWritableByteBufferFactory
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency) implements DictionaryBufferFactory {
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY); @Override
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f); public DictBuffer getDictionaryBuffer(final File file)
return (int)resultFreqFloat; throws FileNotFoundException, IOException {
RandomAccessFile raFile = null;
ByteBuffer buffer = null;
try {
raFile = new RandomAccessFile(file, "rw");
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
} finally {
if (raFile != null) {
raFile.close();
}
}
if (buffer != null) {
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
}
return null;
}
}
private final File mDictionaryBinaryFile;
private DictBuffer mDictBuffer;
public BinaryDictDecoder(final File file) {
mDictionaryBinaryFile = file;
mDictBuffer = null;
}
public void openDictBuffer(final DictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile);
}
public DictBuffer getDictBuffer() {
return mDictBuffer;
}
@UsedForTesting
public DictBuffer openAndGetDictBuffer(
final DictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
openDictBuffer(factory);
return getDictBuffer();
}
// The implementation of HeaderReader
@Override
public int readVersion() throws IOException, UnsupportedFormatException {
return BinaryDictDecoderUtils.checkFormatVersion(mDictBuffer);
}
@Override
public int readOptionFlags() {
return mDictBuffer.readUnsignedShort();
}
@Override
public int readHeaderSize() {
return mDictBuffer.readInt();
}
@Override
public HashMap<String, String> readAttributes(final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (mDictBuffer.position() < headerSize) {
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
// by calling CharEncoding.readString.
final String key = CharEncoding.readString(mDictBuffer);
final String value = CharEncoding.readString(mDictBuffer);
attributes.put(key, value);
}
mDictBuffer.position(headerSize);
return attributes;
} }
} }

View file

@ -0,0 +1,777 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
/**
* Decodes binary files for a FusionDictionary.
*
* All the methods in this class are static.
*
* TODO: Remove calls from classes except BinaryDictDecoder
* TODO: Move this file to makedict/internal.
*/
public final class BinaryDictDecoderUtils {
private static final boolean DBG = MakedictLog.DBG;
private BinaryDictDecoderUtils() {
// This utility class is not publicly instantiable.
}
private static final int MAX_JUMPS = 12;
@UsedForTesting
public interface DictBuffer {
public int readUnsignedByte();
public int readUnsignedShort();
public int readUnsignedInt24();
public int readInt();
public int position();
public void position(int newPosition);
public void put(final byte b);
public int limit();
@UsedForTesting
public int capacity();
}
public static final class ByteBufferDictBuffer implements DictBuffer {
private ByteBuffer mBuffer;
public ByteBufferDictBuffer(final ByteBuffer buffer) {
mBuffer = buffer;
}
@Override
public int readUnsignedByte() {
return mBuffer.get() & 0xFF;
}
@Override
public int readUnsignedShort() {
return mBuffer.getShort() & 0xFFFF;
}
@Override
public int readUnsignedInt24() {
final int retval = readUnsignedByte();
return (retval << 16) + readUnsignedShort();
}
@Override
public int readInt() {
return mBuffer.getInt();
}
@Override
public int position() {
return mBuffer.position();
}
@Override
public void position(int newPos) {
mBuffer.position(newPos);
}
@Override
public void put(final byte b) {
mBuffer.put(b);
}
@Override
public int limit() {
return mBuffer.limit();
}
@Override
public int capacity() {
return mBuffer.capacity();
}
}
/**
* A class grouping utility function for our specific character encoding.
*/
static final class CharEncoding {
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
/**
* Helper method to find out whether this code fits on one byte
*/
private static boolean fitsOnOneByte(final int character) {
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
}
/**
* Compute the size of a character given its character code.
*
* Char format is:
* 1 byte = bbbbbbbb match
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
* 00011111 would be outside unicode.
* else: iso-latin-1 code
* This allows for the whole unicode range to be encoded, including chars outside of
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
* characters which should never happen anyway (and still work, but take 3 bytes).
*
* @param character the character code.
* @return the size in binary encoded-form, either 1 or 3 bytes.
*/
static int getCharSize(final int character) {
// See char encoding in FusionDictionary.java
if (fitsOnOneByte(character)) return 1;
if (FormatSpec.INVALID_CHARACTER == character) return 1;
return 3;
}
/**
* Compute the byte size of a character array.
*/
static int getCharArraySize(final int[] chars) {
int size = 0;
for (int character : chars) size += getCharSize(character);
return size;
}
/**
* Writes a char array to a byte buffer.
*
* @param codePoints the code point array to write.
* @param buffer the byte buffer to write to.
* @param index the index in buffer to write the character array to.
* @return the index after the last character.
*/
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
for (int codePoint : codePoints) {
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
return index;
}
/**
* Writes a string with our character format to a byte buffer.
*
* This will also write the terminator byte.
*
* @param buffer the byte buffer to write to.
* @param origin the offset to write from.
* @param word the string to write.
* @return the size written, in bytes.
*/
static int writeString(final byte[] buffer, final int origin,
final String word) {
final int length = word.length();
int index = origin;
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
return index - origin;
}
/**
* Writes a string with our character format to a ByteArrayOutputStream.
*
* This will also write the terminator byte.
*
* @param buffer the ByteArrayOutputStream to write to.
* @param word the string to write.
*/
static void writeString(final ByteArrayOutputStream buffer, final String word) {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer.write((byte) codePoint);
} else {
buffer.write((byte) (0xFF & (codePoint >> 16)));
buffer.write((byte) (0xFF & (codePoint >> 8)));
buffer.write((byte) (0xFF & codePoint));
}
}
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
}
/**
* Reads a string from a DictBuffer. This is the converse of the above method.
*/
static String readString(final DictBuffer dictBuffer) {
final StringBuilder s = new StringBuilder();
int character = readChar(dictBuffer);
while (character != FormatSpec.INVALID_CHARACTER) {
s.appendCodePoint(character);
character = readChar(dictBuffer);
}
return s.toString();
}
/**
* Reads a character from the buffer.
*
* This follows the character format documented earlier in this source file.
*
* @param dictBuffer the buffer, positioned over an encoded character.
* @return the character code.
*/
static int readChar(final DictBuffer dictBuffer) {
int character = dictBuffer.readUnsignedByte();
if (!fitsOnOneByte(character)) {
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
return FormatSpec.INVALID_CHARACTER;
}
character <<= 16;
character += dictBuffer.readUnsignedShort();
}
return character;
}
}
// Input methods: Read a binary dictionary to memory.
// readDictionaryBinary is the public entry point for them.
static int readChildrenAddress(final DictBuffer dictBuffer,
final int optionFlags, final FormatOptions options) {
if (options.mSupportsDynamicUpdate) {
final int address = dictBuffer.readUnsignedInt24();
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
if ((address & FormatSpec.MSB24) != 0) {
return -(address & FormatSpec.SINT24_MAX);
} else {
return address;
}
}
int address;
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
static int readParentAddress(final DictBuffer dictBuffer,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
final int parentAddress = dictBuffer.readUnsignedInt24();
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
return sign * (parentAddress & FormatSpec.SINT24_MAX);
} else {
return FormatSpec.NO_PARENT_ADDRESS;
}
}
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
public static CharGroupInfo readCharGroup(final DictBuffer dictBuffer,
final int originalGroupAddress, final FormatOptions options) {
int addressPointer = originalGroupAddress;
final int flags = dictBuffer.readUnsignedByte();
++addressPointer;
final int parentAddress = readParentAddress(dictBuffer, options);
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
addressPointer += 3;
}
final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0;
int character = CharEncoding.readChar(dictBuffer);
addressPointer += CharEncoding.getCharSize(character);
while (-1 != character) {
// FusionDictionary is making sure that the length of the word is smaller than
// MAX_WORD_LENGTH.
// So we'll never write past the end of CHARACTER_BUFFER.
CHARACTER_BUFFER[index++] = character;
character = CharEncoding.readChar(dictBuffer);
addressPointer += CharEncoding.getCharSize(character);
}
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
} else {
final int character = CharEncoding.readChar(dictBuffer);
addressPointer += CharEncoding.getCharSize(character);
characters = new int[] { character };
}
final int frequency;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
++addressPointer;
frequency = dictBuffer.readUnsignedByte();
} else {
frequency = CharGroup.NOT_A_TERMINAL;
}
int childrenAddress = readChildrenAddress(dictBuffer, flags, options);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
final int pointerBefore = dictBuffer.position();
shortcutTargets = new ArrayList<WeightedString>();
dictBuffer.readUnsignedShort(); // Skip the size
while (true) {
final int targetFlags = dictBuffer.readUnsignedByte();
final String word = CharEncoding.readString(dictBuffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
addressPointer += dictBuffer.position() - pointerBefore;
}
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = dictBuffer.readUnsignedByte();
++addressPointer;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = addressPointer;
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * dictBuffer.readUnsignedByte();
addressPointer += 1;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * dictBuffer.readUnsignedShort();
addressPointer += 2;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
final int offset = (dictBuffer.readUnsignedByte() << 16)
+ dictBuffer.readUnsignedShort();
bigramAddress += sign * offset;
addressPointer += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
MakedictLog.d("too many bigrams in a group.");
}
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams);
}
/**
* Reads and returns the char group count out of a buffer and forwards the pointer.
*/
public static int readCharGroupCount(final DictBuffer dictBuffer) {
final int msb = dictBuffer.readUnsignedByte();
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
return msb;
} else {
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+ dictBuffer.readUnsignedByte();
}
}
/**
* Finds, as a string, the word at the address passed as an argument.
*
* @param dictBuffer the buffer to read from.
* @param headerSize the size of the header.
* @param address the address to seek.
* @param formatOptions file format options.
* @return the word with its frequency, as a weighted string.
*/
/* package for tests */ static WeightedString getWordAtAddress(
final DictBuffer dictBuffer, final int headerSize, final int address,
final FormatOptions formatOptions) {
final WeightedString result;
final int originalPointer = dictBuffer.position();
dictBuffer.position(address);
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
result = getWordAtAddressWithParentAddress(dictBuffer, headerSize, address,
formatOptions);
} else {
result = getWordAtAddressWithoutParentAddress(dictBuffer, headerSize, address,
formatOptions);
}
dictBuffer.position(originalPointer);
return result;
}
@SuppressWarnings("unused")
private static WeightedString getWordAtAddressWithParentAddress(
final DictBuffer dictBuffer, final int headerSize, final int address,
final FormatOptions options) {
int currentAddress = address;
int frequency = Integer.MIN_VALUE;
final StringBuilder builder = new StringBuilder();
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
CharGroupInfo currentInfo;
int loopCounter = 0;
do {
dictBuffer.position(currentAddress + headerSize);
currentInfo = readCharGroup(dictBuffer, currentAddress, options);
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
if (DBG && loopCounter++ > MAX_JUMPS) {
MakedictLog.d("Too many jumps - probably a bug");
}
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
builder.insert(0,
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
return new WeightedString(builder.toString(), frequency);
}
private static WeightedString getWordAtAddressWithoutParentAddress(
final DictBuffer dictBuffer, final int headerSize, final int address,
final FormatOptions options) {
dictBuffer.position(headerSize);
final int count = readCharGroupCount(dictBuffer);
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
final StringBuilder builder = new StringBuilder();
WeightedString result = null;
CharGroupInfo last = null;
for (int i = count - 1; i >= 0; --i) {
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
groupOffset = info.mEndAddress;
if (info.mOriginalAddress == address) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
result = new WeightedString(builder.toString(), info.mFrequency);
break; // and return
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
if (info.mChildrenAddress > address) {
if (null == last) continue;
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
dictBuffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(dictBuffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
last = info;
}
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
dictBuffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(dictBuffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
}
return result;
}
/**
* Reads a single node array from a buffer.
*
* This methods reads the file at the current position. A node array is fully expected to start
* at the current position.
* This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes.
*
* @param dictBuffer the buffer, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups.
* @param options file format options.
* @return the read node array with all his children already read.
*/
private static PtNodeArray readNodeArray(final DictBuffer dictBuffer,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
throws IOException {
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
final int nodeArrayOrigin = dictBuffer.position() - headerSize;
do { // Scan the linked-list node.
final int nodeArrayHeadPosition = dictBuffer.position() - headerSize;
final int count = readCharGroupCount(dictBuffer);
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
for (PendingAttribute bigram : info.mBigrams) {
final WeightedString word = getWordAtAddress(
dictBuffer, headerSize, bigram.mAddress, options);
final int reconstructedFrequency =
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
}
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) {
final int currentPosition = dictBuffer.position();
dictBuffer.position(info.mChildrenAddress + headerSize);
children = readNodeArray(dictBuffer, headerSize, reverseNodeArrayMap,
reverseGroupMap, options);
dictBuffer.position(currentPosition);
}
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else {
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
}
groupOffset = info.mEndAddress;
}
// reach the end of the array.
if (options.mSupportsDynamicUpdate) {
final int nextAddress = dictBuffer.readUnsignedInt24();
if (nextAddress >= 0 && nextAddress < dictBuffer.limit()) {
dictBuffer.position(nextAddress);
} else {
break;
}
}
} while (options.mSupportsDynamicUpdate &&
dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return nodeArray;
}
/**
* Helper function to get the binary format version from the header.
* @throws IOException
*/
private static int getFormatVersion(final DictBuffer dictBuffer)
throws IOException {
final int magic = dictBuffer.readInt();
if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort();
return FormatSpec.NOT_A_VERSION_NUMBER;
}
/**
* Helper function to get and validate the binary format version.
* @throws UnsupportedFormatException
* @throws IOException
*/
static int checkFormatVersion(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
final int version = getFormatVersion(dictBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("This file has version " + version
+ ", but this implementation does not support versions above "
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
}
return version;
}
/**
* Reads a header from a buffer.
* @param headerReader the header reader
* @throws IOException
* @throws UnsupportedFormatException
*/
public static FileHeader readHeader(final HeaderReader headerReader)
throws IOException, UnsupportedFormatException {
final int version = headerReader.readVersion();
final int optionsFlags = headerReader.readOptionFlags();
final int headerSize = headerReader.readHeaderSize();
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
return header;
}
/**
* Reads options from a buffer and populate a map with their contents.
*
* The buffer is read at the current position, so the caller must take care the pointer
* is in the right place before calling this.
*/
public static void populateOptions(final DictBuffer dictBuffer,
final int headerSize, final HashMap<String, String> options) {
while (dictBuffer.position() < headerSize) {
final String key = CharEncoding.readString(dictBuffer);
final String value = CharEncoding.readString(dictBuffer);
options.put(key, value);
}
}
/**
* Reads a buffer and returns the memory representation of the dictionary.
*
* This high-level method takes a buffer and reads its contents, populating a
* FusionDictionary structure. The optional dict argument is an existing dictionary to
* which words from the buffer should be added. If it is null, a new dictionary is created.
*
* @param dictDecoder the dict decoder.
* @param dict an optional dictionary to add words to, or null.
* @return the created (or merged) dictionary.
*/
@UsedForTesting
public static FusionDictionary readDictionaryBinary(final BinaryDictDecoder dictDecoder,
final FusionDictionary dict) throws FileNotFoundException, IOException,
UnsupportedFormatException {
// if the buffer has not been opened, open the buffer with bytebuffer.
if (dictDecoder.getDictBuffer() == null) dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
if (dictDecoder.getDictBuffer() == null) {
MakedictLog.e("Cannot open the buffer");
}
// Read header
final FileHeader fileHeader = readHeader(dictDecoder);
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final PtNodeArray root = readNodeArray(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) {
for (final Word w : dict) {
if (w.mIsBlacklistEntry) {
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
} else {
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
}
}
for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to
// words that are not also registered as unigrams so we don't have to avoid
// them explicitly here.
for (final WeightedString bigram : w.mBigrams) {
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
}
}
}
return newDict;
}
/**
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
*/
public static boolean isBinaryDictionary(final String filename) {
final File file = new File(filename);
return isBinaryDictionary(file);
}
/**
* Basic test to find out whether the file is a binary dictionary or not.
*
* Concretely this only tests the magic number.
*
* @param file The file to test.
* @return true if it's a binary dictionary, false otherwise
*/
public static boolean isBinaryDictionary(final File file) {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, file.length());
final int version = getFormatVersion(new ByteBufferDictBuffer(buffer));
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
return false;
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
}
}
}

View file

@ -16,7 +16,7 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;

View file

@ -18,13 +18,13 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayWrapper; import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import java.io.File; import java.io.File;
import java.io.FileInputStream; import java.io.FileInputStream;
@ -62,7 +62,7 @@ public final class BinaryDictIOUtils {
* Retrieves all node arrays without recursive call. * Retrieves all node arrays without recursive call.
*/ */
private static void readUnigramsAndBigramsBinaryInner( private static void readUnigramsAndBigramsBinaryInner(
final FusionDictionaryBufferInterface buffer, final int headerSize, final DictBuffer dictBuffer, final int headerSize,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams, final Map<Integer, ArrayList<PendingAttribute>> bigrams,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
@ -82,11 +82,11 @@ public final class BinaryDictIOUtils {
p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength); p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
} }
if (buffer.position() != p.mAddress) buffer.position(p.mAddress); if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress);
if (index != p.mLength) index = p.mLength; if (index != p.mLength) index = p.mLength;
if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) { if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
p.mNumOfCharGroup = BinaryDictDecoder.readCharGroupCount(buffer); p.mNumOfCharGroup = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
p.mAddress += getGroupCountSize(p.mNumOfCharGroup); p.mAddress += getGroupCountSize(p.mNumOfCharGroup);
p.mPosition = 0; p.mPosition = 0;
} }
@ -94,7 +94,7 @@ public final class BinaryDictIOUtils {
stack.pop(); stack.pop();
continue; continue;
} }
CharGroupInfo info = BinaryDictDecoder.readCharGroup(buffer, CharGroupInfo info = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
p.mAddress - headerSize, formatOptions); p.mAddress - headerSize, formatOptions);
for (int i = 0; i < info.mCharacters.length; ++i) { for (int i = 0; i < info.mCharacters.length; ++i) {
pushedChars[index++] = info.mCharacters[i]; pushedChars[index++] = info.mCharacters[i];
@ -114,7 +114,7 @@ public final class BinaryDictIOUtils {
if (p.mPosition == p.mNumOfCharGroup) { if (p.mPosition == p.mNumOfCharGroup) {
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = buffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
// The node array has a forward link. // The node array has a forward link.
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT; p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
@ -127,7 +127,7 @@ public final class BinaryDictIOUtils {
} }
} else { } else {
// The node array has more groups. // The node array has more groups.
p.mAddress = buffer.position(); p.mAddress = dictBuffer.position();
} }
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) { if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
* Reads unigrams and bigrams from the binary file. * Reads unigrams and bigrams from the binary file.
* Doesn't store a full memory representation of the dictionary. * Doesn't store a full memory representation of the dictionary.
* *
* @param dictReader the dict reader. * @param dictDecoder the dict decoder.
* @param words the map to store the address as a key and the word as a value. * @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value. * @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value. * @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException if the file can't be read. * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized. * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader, public static void readUnigramsAndBigramsBinary(final BinaryDictDecoder dictDecoder,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies, final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException, final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
UnsupportedFormatException { UnsupportedFormatException {
// Read header // Read header
final FileHeader header = BinaryDictDecoder.readHeader(dictReader); final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words, readUnigramsAndBigramsBinaryInner(dictDecoder.getDictBuffer(), header.mHeaderSize, words,
frequencies, bigrams, header.mFormatOptions); frequencies, bigrams, header.mFormatOptions);
} }
@ -162,32 +162,32 @@ public final class BinaryDictIOUtils {
* Gets the address of the last CharGroup of the exact matching word in the dictionary. * Gets the address of the last CharGroup of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD. * If no match is found, returns NOT_VALID_WORD.
* *
* @param dictReader the dict reader. * @param dictDecoder the dict decoder.
* @param word the word we search for. * @param word the word we search for.
* @return the address of the terminal node. * @return the address of the terminal node.
* @throws IOException if the file can't be read. * @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized. * @throws UnsupportedFormatException if the format of the file is not recognized.
*/ */
@UsedForTesting @UsedForTesting
public static int getTerminalPosition(final BinaryDictReader dictReader, public static int getTerminalPosition(final BinaryDictDecoder dictDecoder,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (word == null) return FormatSpec.NOT_VALID_WORD; if (word == null) return FormatSpec.NOT_VALID_WORD;
if (buffer.position() != 0) buffer.position(0); if (dictBuffer.position() != 0) dictBuffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(dictReader); final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
int wordPos = 0; int wordPos = 0;
final int wordLen = word.codePointCount(0, word.length()); final int wordLen = word.codePointCount(0, word.length());
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD; if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
do { do {
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer); final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
boolean foundNextCharGroup = false; boolean foundNextCharGroup = false;
for (int i = 0; i < charGroupCount; ++i) { for (int i = 0; i < charGroupCount; ++i) {
final int charGroupPos = buffer.position(); final int charGroupPos = dictBuffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(
buffer.position(), header.mFormatOptions); dictBuffer, dictBuffer.position(), header.mFormatOptions);
final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags, final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags,
header.mFormatOptions); header.mFormatOptions);
final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags, final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags,
@ -219,7 +219,7 @@ public final class BinaryDictIOUtils {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} }
foundNextCharGroup = true; foundNextCharGroup = true;
buffer.position(currentInfo.mChildrenAddress); dictBuffer.position(currentInfo.mChildrenAddress);
break; break;
} }
} }
@ -233,11 +233,11 @@ public final class BinaryDictIOUtils {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} }
final int forwardLinkAddress = buffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
} }
buffer.position(forwardLinkAddress); dictBuffer.position(forwardLinkAddress);
} while(true); } while(true);
} }
return FormatSpec.NOT_VALID_WORD; return FormatSpec.NOT_VALID_WORD;
@ -246,12 +246,12 @@ public final class BinaryDictIOUtils {
/** /**
* @return the size written, in bytes. Always 3 bytes. * @return the size written, in bytes. Always 3 bytes.
*/ */
static int writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer, static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
final int value) { final int value) {
final int absValue = Math.abs(value); final int absValue = Math.abs(value);
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF)); dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
buffer.put((byte)((absValue >> 8) & 0xFF)); dictBuffer.put((byte)((absValue >> 8) & 0xFF));
buffer.put((byte)(absValue & 0xFF)); dictBuffer.put((byte)(absValue & 0xFF));
return 3; return 3;
} }
@ -289,31 +289,31 @@ public final class BinaryDictIOUtils {
return BinaryDictEncoder.getByteSize(value); return BinaryDictEncoder.getByteSize(value);
} }
static void skipCharGroup(final FusionDictionaryBufferInterface buffer, static void skipCharGroup(final DictBuffer dictBuffer,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int flags = buffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
BinaryDictDecoder.readParentAddress(buffer, formatOptions); BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions);
skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
BinaryDictDecoder.readChildrenAddress(buffer, flags, formatOptions); BinaryDictDecoderUtils.readChildrenAddress(dictBuffer, flags, formatOptions);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) { if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
final int shortcutsSize = buffer.readUnsignedShort(); final int shortcutsSize = dictBuffer.readUnsignedShort();
buffer.position(buffer.position() + shortcutsSize dictBuffer.position(dictBuffer.position() + shortcutsSize
- FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE); - FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE);
} }
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) { if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
int bigramCount = 0; int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) { while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = buffer.readUnsignedByte(); final int bigramFlags = dictBuffer.readUnsignedByte();
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) { switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE: case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
buffer.readUnsignedByte(); dictBuffer.readUnsignedByte();
break; break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES: case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
buffer.readUnsignedShort(); dictBuffer.readUnsignedShort();
break; break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES: case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
buffer.readUnsignedInt24(); dictBuffer.readUnsignedInt24();
break; break;
} }
if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break; if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break;
@ -324,15 +324,15 @@ public final class BinaryDictIOUtils {
} }
} }
static void skipString(final FusionDictionaryBufferInterface buffer, static void skipString(final DictBuffer dictBuffer,
final boolean hasMultipleChars) { final boolean hasMultipleChars) {
if (hasMultipleChars) { if (hasMultipleChars) {
int character = CharEncoding.readChar(buffer); int character = CharEncoding.readChar(dictBuffer);
while (character != FormatSpec.INVALID_CHARACTER) { while (character != FormatSpec.INVALID_CHARACTER) {
character = CharEncoding.readChar(buffer); character = CharEncoding.readChar(dictBuffer);
} }
} else { } else {
CharEncoding.readChar(buffer); CharEncoding.readChar(dictBuffer);
} }
} }
@ -508,24 +508,25 @@ public final class BinaryDictIOUtils {
} }
/** /**
* Find a word using the BinaryDictReader. * Find a word using the BinaryDictDecoder.
* *
* @param dictReader the dict reader * @param dictDecoder the dict reader
* @param word the word searched * @param word the word searched
* @return the found group * @return the found group
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
@UsedForTesting @UsedForTesting
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader, public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictDecoder dictDecoder,
final String word) throws IOException, UnsupportedFormatException { final String word) throws IOException, UnsupportedFormatException {
int position = getTerminalPosition(dictReader, word); int position = getTerminalPosition(dictDecoder, word);
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (position != FormatSpec.NOT_VALID_WORD) { if (position != FormatSpec.NOT_VALID_WORD) {
buffer.position(0); dictBuffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(dictReader); final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
buffer.position(position); dictBuffer.position(position);
return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions); return BinaryDictDecoderUtils.readCharGroup(dictBuffer, position,
header.mFormatOptions);
} }
return null; return null;
} }
@ -544,21 +545,21 @@ public final class BinaryDictIOUtils {
final File file, final long offset, final long length) final File file, final long offset, final long length)
throws FileNotFoundException, IOException, UnsupportedFormatException { throws FileNotFoundException, IOException, UnsupportedFormatException {
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE]; final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
final BinaryDictReader dictReader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() { dictDecoder.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFactory() {
@Override @Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file) public DictBuffer getDictionaryBuffer(File file)
throws FileNotFoundException, IOException { throws FileNotFoundException, IOException {
final FileInputStream inStream = new FileInputStream(file); final FileInputStream inStream = new FileInputStream(file);
try { try {
inStream.read(buffer); inStream.read(buffer);
return new ByteArrayWrapper(buffer); return new ByteArrayDictBuffer(buffer);
} finally { } finally {
inStream.close(); inStream.close();
} }
} }
}); });
return BinaryDictDecoder.readHeader(dictReader); return BinaryDictDecoderUtils.readHeader(dictDecoder);
} }
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset, public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
@ -636,4 +637,19 @@ public final class BinaryDictIOUtils {
return 0; return 0;
} }
} }
/**
* Calculate bigram frequency from compressed value
*
* @param unigramFrequency
* @param bigramFrequency compressed frequency
* @return approximate bigram frequency
*/
public static int reconstructBigramFrequency(final int unigramFrequency,
final int bigramFrequency) {
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
return (int)resultFreqFloat;
}
} }

View file

@ -1,169 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
public class BinaryDictReader implements HeaderReaderInterface {
public interface FusionDictionaryBufferFactory {
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException;
}
/**
* Creates FusionDictionaryBuffer from a ByteBuffer
*/
public static final class FusionDictionaryBufferFromByteBufferFactory
implements FusionDictionaryBufferFactory {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
ByteBuffer buffer = null;
try {
inStream = new FileInputStream(file);
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
0, file.length());
} finally {
if (inStream != null) {
inStream.close();
}
}
if (buffer != null) {
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
}
return null;
}
}
/**
* Creates FusionDictionaryBuffer from a byte array
*/
public static final class FusionDictionaryBufferFromByteArrayFactory
implements FusionDictionaryBufferFactory {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final byte[] array = new byte[(int) file.length()];
inStream.read(array);
return new ByteArrayWrapper(array);
} finally {
if (inStream != null) {
inStream.close();
}
}
}
}
/**
* Creates FusionDictionaryBuffer from a RandomAccessFile.
*/
@UsedForTesting
public static final class FusionDictionaryBufferFromWritableByteBufferFactory
implements FusionDictionaryBufferFactory {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
RandomAccessFile raFile = null;
ByteBuffer buffer = null;
try {
raFile = new RandomAccessFile(file, "rw");
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
} finally {
if (raFile != null) {
raFile.close();
}
}
if (buffer != null) {
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
}
return null;
}
}
private final File mDictionaryBinaryFile;
private FusionDictionaryBufferInterface mFusionDictionaryBuffer;
public BinaryDictReader(final File file) {
mDictionaryBinaryFile = file;
mFusionDictionaryBuffer = null;
}
public void openBuffer(final FusionDictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile);
}
public FusionDictionaryBufferInterface getBuffer() {
return mFusionDictionaryBuffer;
}
@UsedForTesting
public FusionDictionaryBufferInterface openAndGetBuffer(
final FusionDictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
openBuffer(factory);
return getBuffer();
}
// The implementation of HeaderReaderInterface
@Override
public int readVersion() throws IOException, UnsupportedFormatException {
return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
}
@Override
public int readOptionFlags() {
return mFusionDictionaryBuffer.readUnsignedShort();
}
@Override
public int readHeaderSize() {
return mFusionDictionaryBuffer.readInt();
}
@Override
public HashMap<String, String> readAttributes(final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (mFusionDictionaryBuffer.position() < headerSize) {
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
// by calling CharEncoding.readString.
final String key = CharEncoding.readString(mFusionDictionaryBuffer);
final String value = CharEncoding.readString(mFusionDictionaryBuffer);
attributes.put(key, value);
}
mFusionDictionaryBuffer.position(headerSize);
return attributes;
}
}

View file

@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting; import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants; import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -49,142 +49,146 @@ public final class DynamicBinaryDictIOUtils {
/** /**
* Delete the word from the binary file. * Delete the word from the binary file.
* *
* @param dictReader the dict reader. * @param dictDecoder the dict decoder.
* @param word the word we delete * @param word the word we delete
* @throws IOException * @throws IOException
* @throws UnsupportedFormatException * @throws UnsupportedFormatException
*/ */
@UsedForTesting @UsedForTesting
public static void deleteWord(final BinaryDictReader dictReader, final String word) public static void deleteWord(final BinaryDictDecoder dictDecoder, final String word)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
buffer.position(0); dictBuffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(dictReader); final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word); final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
if (wordPosition == FormatSpec.NOT_VALID_WORD) return; if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
buffer.position(wordPosition); dictBuffer.position(wordPosition);
final int flags = buffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
buffer.position(wordPosition); dictBuffer.position(wordPosition);
buffer.put((byte)markAsDeleted(flags)); dictBuffer.put((byte)markAsDeleted(flags));
} }
/** /**
* Update a parent address in a CharGroup that is referred to by groupOriginAddress. * Update a parent address in a CharGroup that is referred to by groupOriginAddress.
* *
* @param buffer the buffer to write. * @param dictBuffer the DictBuffer to write.
* @param groupOriginAddress the address of the group. * @param groupOriginAddress the address of the group.
* @param newParentAddress the absolute address of the parent. * @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
public static void updateParentAddress(final FusionDictionaryBufferInterface buffer, public static void updateParentAddress(final DictBuffer dictBuffer,
final int groupOriginAddress, final int newParentAddress, final int groupOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int originalPosition = buffer.position(); final int originalPosition = dictBuffer.position();
buffer.position(groupOriginAddress); dictBuffer.position(groupOriginAddress);
if (!formatOptions.mSupportsDynamicUpdate) { if (!formatOptions.mSupportsDynamicUpdate) {
throw new RuntimeException("this file format does not support parent addresses"); throw new RuntimeException("this file format does not support parent addresses");
} }
final int flags = buffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) { if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
// If the group is moved, the parent address is stored in the destination group. // If the group is moved, the parent address is stored in the destination group.
// We are guaranteed to process the destination group later, so there is no need to // We are guaranteed to process the destination group later, so there is no need to
// update anything here. // update anything here.
buffer.position(originalPosition); dictBuffer.position(originalPosition);
return; return;
} }
if (DBG) { if (DBG) {
MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress); MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress);
} }
final int parentOffset = newParentAddress - groupOriginAddress; final int parentOffset = newParentAddress - groupOriginAddress;
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, parentOffset); BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset);
buffer.position(originalPosition); dictBuffer.position(originalPosition);
} }
/** /**
* Update parent addresses in a node array stored at nodeOriginAddress. * Update parent addresses in a node array stored at nodeOriginAddress.
* *
* @param buffer the buffer to be modified. * @param dictBuffer the DictBuffer to be modified.
* @param nodeOriginAddress the address of the node array to update. * @param nodeOriginAddress the address of the node array to update.
* @param newParentAddress the address to be written. * @param newParentAddress the address to be written.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
public static void updateParentAddresses(final FusionDictionaryBufferInterface buffer, public static void updateParentAddresses(final DictBuffer dictBuffer,
final int nodeOriginAddress, final int newParentAddress, final int nodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int originalPosition = buffer.position(); final int originalPosition = dictBuffer.position();
buffer.position(nodeOriginAddress); dictBuffer.position(nodeOriginAddress);
do { do {
final int count = BinaryDictDecoder.readCharGroupCount(buffer); final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions); updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress,
BinaryDictIOUtils.skipCharGroup(buffer, formatOptions); formatOptions);
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
} }
final int forwardLinkAddress = buffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
buffer.position(forwardLinkAddress); dictBuffer.position(forwardLinkAddress);
} while (formatOptions.mSupportsDynamicUpdate } while (formatOptions.mSupportsDynamicUpdate
&& buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS); && dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
buffer.position(originalPosition); dictBuffer.position(originalPosition);
} }
/** /**
* Update a children address in a CharGroup that is addressed by groupOriginAddress. * Update a children address in a CharGroup that is addressed by groupOriginAddress.
* *
* @param buffer the buffer to write. * @param dictBuffer the DictBuffer to write.
* @param groupOriginAddress the address of the group. * @param groupOriginAddress the address of the group.
* @param newChildrenAddress the absolute address of the child. * @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options. * @param formatOptions file format options.
*/ */
public static void updateChildrenAddress(final FusionDictionaryBufferInterface buffer, public static void updateChildrenAddress(final DictBuffer dictBuffer,
final int groupOriginAddress, final int newChildrenAddress, final int groupOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
final int originalPosition = buffer.position(); final int originalPosition = dictBuffer.position();
buffer.position(groupOriginAddress); dictBuffer.position(groupOriginAddress);
final int flags = buffer.readUnsignedByte(); final int flags = dictBuffer.readUnsignedByte();
final int parentAddress = BinaryDictDecoder.readParentAddress(buffer, formatOptions); final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer,
BinaryDictIOUtils.skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0); formatOptions);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte(); BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position(); ? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - dictBuffer.position();
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, childrenOffset); BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, childrenOffset);
buffer.position(originalPosition); dictBuffer.position(originalPosition);
} }
/** /**
* Helper method to move a char group to the tail of the file. * Helper method to move a char group to the tail of the file.
*/ */
private static int moveCharGroup(final OutputStream destination, private static int moveCharGroup(final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info, final DictBuffer dictBuffer, final CharGroupInfo info,
final int nodeArrayOriginAddress, final int oldGroupAddress, final int nodeArrayOriginAddress, final int oldGroupAddress,
final FormatOptions formatOptions) throws IOException { final FormatOptions formatOptions) throws IOException {
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions); updateParentAddress(dictBuffer, oldGroupAddress, dictBuffer.limit() + 1, formatOptions);
buffer.position(oldGroupAddress); dictBuffer.position(oldGroupAddress);
final int currentFlags = buffer.readUnsignedByte(); final int currentFlags = dictBuffer.readUnsignedByte();
buffer.position(oldGroupAddress); dictBuffer.position(oldGroupAddress);
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG)))); & (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
int size = FormatSpec.GROUP_FLAGS_SIZE; int size = FormatSpec.GROUP_FLAGS_SIZE;
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions); updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info }); size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
return size; return size;
} }
@SuppressWarnings("unused") @SuppressWarnings("unused")
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer, private static void updateForwardLink(final DictBuffer dictBuffer,
final int nodeArrayOriginAddress, final int newNodeArrayAddress, final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) { final FormatOptions formatOptions) {
buffer.position(nodeArrayOriginAddress); dictBuffer.position(nodeArrayOriginAddress);
int jumpCount = 0; int jumpCount = 0;
while (jumpCount++ < MAX_JUMPS) { while (jumpCount++ < MAX_JUMPS) {
final int count = BinaryDictDecoder.readCharGroupCount(buffer); final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
for (int i = 0; i < count; ++i) BinaryDictIOUtils.skipCharGroup(buffer, formatOptions); for (int i = 0; i < count; ++i) {
final int forwardLinkAddress = buffer.readUnsignedInt24(); BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
}
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) { if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE); dictBuffer.position(dictBuffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress); BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeArrayAddress);
return; return;
} }
buffer.position(forwardLinkAddress); dictBuffer.position(forwardLinkAddress);
} }
if (DBG && jumpCount >= MAX_JUMPS) { if (DBG && jumpCount >= MAX_JUMPS) {
throw new RuntimeException("too many jumps, probably a bug."); throw new RuntimeException("too many jumps, probably a bug.");
@ -204,7 +208,7 @@ public final class DynamicBinaryDictIOUtils {
* @param shortcutTargets the shortcut targets for this group. * @param shortcutTargets the shortcut targets for this group.
* @param bigrams the bigrams for this group. * @param bigrams the bigrams for this group.
* @param destination the stream representing the tail of the file. * @param destination the stream representing the tail of the file.
* @param buffer the buffer representing the (constant-size) body of the file. * @param dictBuffer the DictBuffer representing the (constant-size) body of the file.
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of. * @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
* @param oldGroupOrigin the old origin where this group used to be stored. * @param oldGroupOrigin the old origin where this group used to be stored.
* @param formatOptions format options for this dictionary. * @param formatOptions format options for this dictionary.
@ -215,7 +219,7 @@ public final class DynamicBinaryDictIOUtils {
final int length, final int flags, final int frequency, final int parentAddress, final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination, final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin, final DictBuffer dictBuffer, final int oldNodeArrayOrigin,
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException { final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0; int size = 0;
final int newGroupOrigin = fileEndAddress + 1; final int newGroupOrigin = fileEndAddress + 1;
@ -228,7 +232,7 @@ public final class DynamicBinaryDictIOUtils {
flags, writtenCharacters, frequency, parentAddress, flags, writtenCharacters, frequency, parentAddress,
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets, fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
bigrams); bigrams);
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin, moveCharGroup(destination, dictBuffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
formatOptions); formatOptions);
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE; return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
} }
@ -236,7 +240,7 @@ public final class DynamicBinaryDictIOUtils {
/** /**
* Insert a word into a binary dictionary. * Insert a word into a binary dictionary.
* *
* @param dictReader the dict reader. * @param dictDecoder the dict decoder.
* @param destination a stream to the underlying file, with the pointer at the end of the file. * @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert. * @param word the word to insert.
* @param frequency the frequency of the new word. * @param frequency the frequency of the new word.
@ -249,16 +253,17 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion. // TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary. // TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting @UsedForTesting
public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination, public static void insertWord(final BinaryDictDecoder dictDecoder,
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings, final OutputStream destination, final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord, final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
final boolean isBlackListEntry) final boolean isBlackListEntry)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>(); final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (bigramStrings != null) { if (bigramStrings != null) {
for (final WeightedString bigram : bigramStrings) { for (final WeightedString bigram : bigramStrings) {
int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord); int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, bigram.mWord);
if (position == FormatSpec.NOT_VALID_WORD) { if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here. // TODO: figure out what is the correct thing to do here.
} else { } else {
@ -272,24 +277,24 @@ public final class DynamicBinaryDictIOUtils {
final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty(); final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty();
// find the insert position of the word. // find the insert position of the word.
if (buffer.position() != 0) buffer.position(0); if (dictBuffer.position() != 0) dictBuffer.position(0);
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position(); int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position();
final int[] codePoints = FusionDictionary.getCodePoints(word); final int[] codePoints = FusionDictionary.getCodePoints(word);
final int wordLen = codePoints.length; final int wordLen = codePoints.length;
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) { for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
if (wordPos >= wordLen) break; if (wordPos >= wordLen) break;
nodeOriginAddress = buffer.position(); nodeOriginAddress = dictBuffer.position();
int nodeParentAddress = -1; int nodeParentAddress = -1;
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer); final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
boolean foundNextGroup = false; boolean foundNextGroup = false;
for (int i = 0; i < charGroupCount; ++i) { for (int i = 0; i < charGroupCount; ++i) {
address = buffer.position(); address = dictBuffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
buffer.position(), fileHeader.mFormatOptions); dictBuffer.position(), fileHeader.mFormatOptions);
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
if (isMovedGroup) continue; if (isMovedGroup) continue;
@ -308,18 +313,18 @@ public final class DynamicBinaryDictIOUtils {
* after * after
* abc - d - ef * abc - d - ef
*/ */
final int newNodeAddress = buffer.limit(); final int newNodeAddress = dictBuffer.limit();
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1, final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */, isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
false /* isBlackListEntry */, fileHeader.mFormatOptions); false /* isBlackListEntry */, fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags, int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
frequency, nodeParentAddress, shortcuts, bigrams, destination, frequency, nodeParentAddress, shortcuts, bigrams, destination,
buffer, nodeOriginAddress, address, fileHeader.mFormatOptions); dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p, final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
currentInfo.mCharacters.length); currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
updateParentAddresses(buffer, currentInfo.mChildrenAddress, updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions);
} }
final CharGroupInfo newInfo2 = new CharGroupInfo( final CharGroupInfo newInfo2 = new CharGroupInfo(
@ -344,7 +349,7 @@ public final class DynamicBinaryDictIOUtils {
* - c * - c
*/ */
final int newNodeAddress = buffer.limit(); final int newNodeAddress = dictBuffer.limit();
final int childrenAddress = currentInfo.mChildrenAddress; final int childrenAddress = currentInfo.mChildrenAddress;
// move prefix // move prefix
@ -355,13 +360,13 @@ public final class DynamicBinaryDictIOUtils {
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null, prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
destination, buffer, nodeOriginAddress, address, destination, dictBuffer, nodeOriginAddress, address,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
final int[] suffixCharacters = Arrays.copyOfRange( final int[] suffixCharacters = Arrays.copyOfRange(
currentInfo.mCharacters, p, currentInfo.mCharacters.length); currentInfo.mCharacters, p, currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) { if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
updateParentAddresses(buffer, currentInfo.mChildrenAddress, updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions); newNodeAddress + written + 1, fileHeader.mFormatOptions);
} }
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags( final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
@ -403,7 +408,7 @@ public final class DynamicBinaryDictIOUtils {
if (wordPos + currentInfo.mCharacters.length == wordLen) { if (wordPos + currentInfo.mCharacters.length == wordLen) {
// the word exists in the dictionary. // the word exists in the dictionary.
// only update group. // only update group.
final int newNodeAddress = buffer.limit(); final int newNodeAddress = dictBuffer.limit();
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1; final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars, final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams, isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
@ -412,7 +417,7 @@ public final class DynamicBinaryDictIOUtils {
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency, -1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts, nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
bigrams); bigrams);
moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address, moveCharGroup(destination, dictBuffer, newInfo, nodeOriginAddress, address,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
return; return;
} }
@ -430,8 +435,8 @@ public final class DynamicBinaryDictIOUtils {
* after * after
* ab - cd - e * ab - cd - e
*/ */
final int newNodeAddress = buffer.limit(); final int newNodeAddress = dictBuffer.limit();
updateChildrenAddress(buffer, address, newNodeAddress, updateChildrenAddress(dictBuffer, address, newNodeAddress,
fileHeader.mFormatOptions); fileHeader.mFormatOptions);
final int newGroupAddress = newNodeAddress + 1; final int newGroupAddress = newNodeAddress + 1;
final boolean hasMultipleChars = (wordLen - wordPos) > 1; final boolean hasMultipleChars = (wordLen - wordPos) > 1;
@ -445,7 +450,7 @@ public final class DynamicBinaryDictIOUtils {
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo }); BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
return; return;
} }
buffer.position(currentInfo.mChildrenAddress); dictBuffer.position(currentInfo.mChildrenAddress);
foundNextGroup = true; foundNextGroup = true;
break; break;
} }
@ -454,8 +459,8 @@ public final class DynamicBinaryDictIOUtils {
if (foundNextGroup) continue; if (foundNextGroup) continue;
// reached the end of the array. // reached the end of the array.
final int linkAddressPosition = buffer.position(); final int linkAddressPosition = dictBuffer.position();
int nextLink = buffer.readUnsignedInt24(); int nextLink = dictBuffer.readUnsignedInt24();
if ((nextLink & FormatSpec.MSB24) != 0) { if ((nextLink & FormatSpec.MSB24) != 0) {
nextLink = -(nextLink & FormatSpec.SINT24_MAX); nextLink = -(nextLink & FormatSpec.SINT24_MAX);
} }
@ -475,9 +480,9 @@ public final class DynamicBinaryDictIOUtils {
*/ */
// change the forward link address. // change the forward link address.
final int newNodeAddress = buffer.limit(); final int newNodeAddress = dictBuffer.limit();
buffer.position(linkAddressPosition); dictBuffer.position(linkAddressPosition);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress); BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress);
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen); final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1, final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
@ -490,7 +495,7 @@ public final class DynamicBinaryDictIOUtils {
return; return;
} else { } else {
depth--; depth--;
buffer.position(nextLink); dictBuffer.position(nextLink);
} }
} }
} }

View file

@ -24,7 +24,7 @@ import java.util.HashMap;
/** /**
* An interface to read a binary dictionary file header. * An interface to read a binary dictionary file header.
*/ */
public interface HeaderReaderInterface { public interface HeaderReader {
public int readVersion() throws IOException, UnsupportedFormatException; public int readVersion() throws IOException, UnsupportedFormatException;
public int readOptionFlags(); public int readOptionFlags();
public int readHeaderSize(); public int readHeaderSize();

View file

@ -28,7 +28,7 @@ import com.android.inputmethod.latin.ExpandableDictionary;
import com.android.inputmethod.latin.LatinImeLogger; import com.android.inputmethod.latin.LatinImeLogger;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo; import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer; import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.settings.Settings; import com.android.inputmethod.latin.settings.Settings;
import com.android.inputmethod.latin.utils.CollectionUtils; import com.android.inputmethod.latin.utils.CollectionUtils;
@ -241,10 +241,10 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableDictiona
}; };
// Load the dictionary from binary file // Load the dictionary from binary file
final BinaryDictReader reader = new BinaryDictReader( final BinaryDictDecoder reader = new BinaryDictDecoder(
new File(getContext().getFilesDir(), fileName)); new File(getContext().getFilesDir(), fileName));
try { try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); reader.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener); UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
// This is an expected condition: we don't have a user history dictionary for this // This is an expected condition: we don't have a user history dictionary for this

View file

@ -16,17 +16,17 @@
package com.android.inputmethod.latin.utils; package com.android.inputmethod.latin.utils;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
/** /**
* This class provides an implementation for the FusionDictionary buffer interface that is backed * This class provides an implementation for the FusionDictionary buffer interface that is backed
* by a simpled byte array. It allows to create a binary dictionary in memory. * by a simpled byte array. It allows to create a binary dictionary in memory.
*/ */
public final class ByteArrayWrapper implements FusionDictionaryBufferInterface { public final class ByteArrayDictBuffer implements DictBuffer {
private byte[] mBuffer; private byte[] mBuffer;
private int mPosition; private int mPosition;
public ByteArrayWrapper(final byte[] buffer) { public ByteArrayDictBuffer(final byte[] buffer) {
mBuffer = buffer; mBuffer = buffer;
mPosition = 0; mPosition = 0;
} }

View file

@ -22,7 +22,6 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils; import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@ -119,13 +118,13 @@ public final class UserHistoryDictIOUtils {
/** /**
* Reads dictionary from file. * Reads dictionary from file.
*/ */
public static void readDictionaryBinary(final BinaryDictReader reader, public static void readDictionaryBinary(final BinaryDictDecoder dictDecoder,
final OnAddWordListener dict) { final OnAddWordListener dict) {
final Map<Integer, String> unigrams = CollectionUtils.newTreeMap(); final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap(); final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap(); final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
try { try {
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, unigrams, frequencies, BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies,
bigrams); bigrams);
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "IO exception while reading file", e); Log.e(TAG, "IO exception while reading file", e);
@ -157,7 +156,7 @@ public final class UserHistoryDictIOUtils {
continue; continue;
} }
to.setBigram(word1, word2, to.setBigram(word1, word2,
BinaryDictDecoder.reconstructBigramFrequency(unigramFrequency, BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
attr.mFrequency)); attr.mFrequency));
} }
} }

View file

@ -22,7 +22,7 @@ import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log; import android.util.Log;
import android.util.SparseArray; import android.util.SparseArray;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@ -44,7 +44,7 @@ import java.util.Random;
import java.util.Set; import java.util.Set;
/** /**
* Unit tests for BinaryDictDecoder and BinaryDictEncoder. * Unit tests for BinaryDictDecoderUtils and BinaryDictEncoder.
*/ */
@LargeTest @LargeTest
public class BinaryDictDecoderEncoderTests extends AndroidTestCase { public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
@ -118,14 +118,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// Utilities for test // Utilities for test
/** /**
* Makes new buffer according to BUFFER_TYPE. * Makes new DictBuffer according to BUFFER_TYPE.
*/ */
private void getBuffer(final BinaryDictReader reader, final int bufferType) private void getDictBuffer(final BinaryDictDecoder dictDecoder, final int bufferType)
throws FileNotFoundException, IOException { throws FileNotFoundException, IOException {
if (bufferType == USE_BYTE_BUFFER) { if (bufferType == USE_BYTE_BUFFER) {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
} else if (bufferType == USE_BYTE_ARRAY) { } else if (bufferType == USE_BYTE_ARRAY) {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} }
} }
@ -269,14 +271,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap, final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap,
final int bufferType) { final int bufferType) {
long now, diff = -1; long now, diff = -1;
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
FusionDictionary dict = null; FusionDictionary dict = null;
try { try {
getBuffer(reader, bufferType); getDictBuffer(dictDecoder, bufferType);
assertNotNull(reader.getBuffer()); assertNotNull(dictDecoder.getDictBuffer());
now = System.currentTimeMillis(); now = System.currentTimeMillis();
dict = BinaryDictDecoder.readDictionaryBinary(reader, null); dict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
diff = System.currentTimeMillis() - now; diff = System.currentTimeMillis() - now;
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "IOException while reading dictionary", e); Log.e(TAG, "IOException while reading dictionary", e);
@ -388,7 +390,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
actBigrams.get(word1).add(word2); actBigrams.get(word1).add(word2);
final int bigramFreq = BinaryDictDecoder.reconstructBigramFrequency( final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
unigramFreq, attr.mFrequency); unigramFreq, attr.mFrequency);
assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ); assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
} }
@ -407,12 +409,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap(); final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
long now = -1, diff = -1; long now = -1, diff = -1;
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try { try {
getBuffer(reader, bufferType); getDictBuffer(dictDecoder, bufferType);
assertNotNull("Can't get buffer.", reader.getBuffer()); assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer());
now = System.currentTimeMillis(); now = System.currentTimeMillis();
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, resultWords, resultFreqs, BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs,
resultBigrams); resultBigrams);
diff = System.currentTimeMillis() - now; diff = System.currentTimeMillis() - now;
} catch (IOException e) { } catch (IOException e) {
@ -497,31 +499,31 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
// Tests for getTerminalPosition // Tests for getTerminalPosition
private String getWordFromBinary(final BinaryDictReader dictReader, final int address) { private String getWordFromBinary(final BinaryDictDecoder dictDecoder, final int address) {
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (buffer.position() != 0) buffer.position(0); if (dictBuffer.position() != 0) dictBuffer.position(0);
FileHeader fileHeader = null; FileHeader fileHeader = null;
try { try {
fileHeader = BinaryDictDecoder.readHeader(dictReader); fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
} catch (IOException e) { } catch (IOException e) {
return null; return null;
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
return null; return null;
} }
if (fileHeader == null) return null; if (fileHeader == null) return null;
return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize, return BinaryDictDecoderUtils.getWordAtAddress(dictBuffer, fileHeader.mHeaderSize,
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord; address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
} }
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index, private long runGetTerminalPosition(final BinaryDictDecoder dictDecoder, final String word,
boolean contained) { int index, boolean contained) {
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255; final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
long diff = -1; long diff = -1;
int position = -1; int position = -1;
try { try {
final long now = System.nanoTime(); final long now = System.nanoTime();
position = BinaryDictIOUtils.getTerminalPosition(reader, word); position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
diff = System.nanoTime() - now; diff = System.nanoTime() - now;
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "IOException while getTerminalPosition", e); Log.e(TAG, "IOException while getTerminalPosition", e);
@ -530,7 +532,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
} }
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained); assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
if (contained) assertEquals(getWordFromBinary(reader, position), word); if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
return diff; return diff;
} }
@ -550,28 +552,29 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try { try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} catch (IOException e) { } catch (IOException e) {
// ignore // ignore
Log.e(TAG, "IOException while opening the buffer", e); Log.e(TAG, "IOException while opening the buffer", e);
} }
assertNotNull("Can't get the buffer", reader.getBuffer()); assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
try { try {
// too long word // too long word
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz"; final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, longWord)); BinaryDictIOUtils.getTerminalPosition(dictDecoder, longWord));
// null // null
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, null)); BinaryDictIOUtils.getTerminalPosition(dictDecoder, null));
// empty string // empty string
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, "")); BinaryDictIOUtils.getTerminalPosition(dictDecoder, ""));
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }
@ -579,7 +582,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// Test a word that is contained within the dictionary. // Test a word that is contained within the dictionary.
long sum = 0; long sum = 0;
for (int i = 0; i < sWords.size(); ++i) { for (int i = 0; i < sWords.size(); ++i) {
final long time = runGetTerminalPosition(reader, sWords.get(i), i, true); final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true);
sum += time == -1 ? 0 : time; sum += time == -1 ? 0 : time;
} }
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000)); Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
@ -590,7 +593,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 0; i < 1000; ++i) { for (int i = 0; i < 1000; ++i) {
final String word = generateWord(random, codePointSet); final String word = generateWord(random, codePointSet);
if (sWords.indexOf(word) != -1) continue; if (sWords.indexOf(word) != -1) continue;
runGetTerminalPosition(reader, word, i, false); runGetTerminalPosition(dictDecoder, word, i, false);
} }
} }
@ -610,28 +613,28 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */); addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE); timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try { try {
reader.openBuffer( dictDecoder.openDictBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} catch (IOException e) { } catch (IOException e) {
// ignore // ignore
Log.e(TAG, "IOException while opening the buffer", e); Log.e(TAG, "IOException while opening the buffer", e);
} }
assertNotNull("Can't get the buffer", reader.getBuffer()); assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
try { try {
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0))); BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0)); DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0));
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0))); BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD, MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5))); BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5)); DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5));
assertEquals(FormatSpec.NOT_VALID_WORD, assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5))); BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }

View file

@ -16,14 +16,14 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.BinaryDictReader.FusionDictionaryBufferFactory; import com.android.inputmethod.latin.makedict.BinaryDictDecoder.DictionaryBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictReader. import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
FusionDictionaryBufferFromByteArrayFactory; DictionaryBufferFromByteArrayFactory;
import com.android.inputmethod.latin.makedict.BinaryDictReader. import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
FusionDictionaryBufferFromByteBufferFactory; DictionaryBufferFromReadOnlyByteBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictReader. import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
FusionDictionaryBufferFromWritableByteBufferFactory; DictionaryBufferFromWritableByteBufferFactory;
import android.test.AndroidTestCase; import android.test.AndroidTestCase;
import android.util.Log; import android.util.Log;
@ -33,10 +33,10 @@ import java.io.FileOutputStream;
import java.io.IOException; import java.io.IOException;
/** /**
* Unit tests for BinaryDictReader * Unit tests for BinaryDictDecoder
*/ */
public class BinaryDictReaderTests extends AndroidTestCase { public class BinaryDictDecoderTests extends AndroidTestCase {
private static final String TAG = BinaryDictReaderTests.class.getSimpleName(); private static final String TAG = BinaryDictDecoderTests.class.getSimpleName();
private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 }; private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@ -61,7 +61,7 @@ public class BinaryDictReaderTests extends AndroidTestCase {
@SuppressWarnings("null") @SuppressWarnings("null")
public void runTestOpenBuffer(final String testName, public void runTestOpenBuffer(final String testName,
final FusionDictionaryBufferFactory factory) { final DictionaryBufferFactory factory) {
File testFile = null; File testFile = null;
try { try {
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
@ -70,9 +70,9 @@ public class BinaryDictReaderTests extends AndroidTestCase {
} }
assertNotNull(testFile); assertNotNull(testFile);
final BinaryDictReader reader = new BinaryDictReader(testFile); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
try { try {
reader.openBuffer(factory); dictDecoder.openDictBuffer(factory);
} catch (Exception e) { } catch (Exception e) {
Log.e(TAG, "Failed to open the buffer", e); Log.e(TAG, "Failed to open the buffer", e);
} }
@ -80,32 +80,32 @@ public class BinaryDictReaderTests extends AndroidTestCase {
writeDataToFile(testFile); writeDataToFile(testFile);
try { try {
reader.openBuffer(factory); dictDecoder.openDictBuffer(factory);
} catch (Exception e) { } catch (Exception e) {
Log.e(TAG, "Raised the exception while opening buffer", e); Log.e(TAG, "Raised the exception while opening buffer", e);
} }
assertEquals(testFile.length(), reader.getBuffer().capacity()); assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
} }
public void testOpenBufferWithByteBuffer() { public void testOpenBufferWithByteBuffer() {
runTestOpenBuffer("testOpenBufferWithByteBuffer", runTestOpenBuffer("testOpenBufferWithByteBuffer",
new FusionDictionaryBufferFromByteBufferFactory()); new DictionaryBufferFromReadOnlyByteBufferFactory());
} }
public void testOpenBufferWithByteArray() { public void testOpenBufferWithByteArray() {
runTestOpenBuffer("testOpenBufferWithByteArray", runTestOpenBuffer("testOpenBufferWithByteArray",
new FusionDictionaryBufferFromByteArrayFactory()); new DictionaryBufferFromByteArrayFactory());
} }
public void testOpenBufferWithWritableByteBuffer() { public void testOpenBufferWithWritableByteBuffer() {
runTestOpenBuffer("testOpenBufferWithWritableByteBuffer", runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
new FusionDictionaryBufferFromWritableByteBufferFactory()); new DictionaryBufferFromWritableByteBufferFactory());
} }
@SuppressWarnings("null") @SuppressWarnings("null")
public void runTestGetBuffer(final String testName, public void runTestGetBuffer(final String testName,
final FusionDictionaryBufferFactory factory) { final DictionaryBufferFactory factory) {
File testFile = null; File testFile = null;
try { try {
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir()); testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
@ -113,40 +113,41 @@ public class BinaryDictReaderTests extends AndroidTestCase {
Log.e(TAG, "IOException while the creating temporary file", e); Log.e(TAG, "IOException while the creating temporary file", e);
} }
final BinaryDictReader reader = new BinaryDictReader(testFile); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
// the default return value of getBuffer() must be null. // the default return value of getBuffer() must be null.
assertNull("the default return value of getBuffer() is not null", reader.getBuffer()); assertNull("the default return value of getBuffer() is not null",
dictDecoder.getDictBuffer());
writeDataToFile(testFile); writeDataToFile(testFile);
assertTrue(testFile.exists()); assertTrue(testFile.exists());
Log.d(TAG, "file length = " + testFile.length()); Log.d(TAG, "file length = " + testFile.length());
FusionDictionaryBufferInterface buffer = null; DictBuffer dictBuffer = null;
try { try {
buffer = reader.openAndGetBuffer(factory); dictBuffer = dictDecoder.openAndGetDictBuffer(factory);
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "Failed to open and get the buffer", e); Log.e(TAG, "Failed to open and get the buffer", e);
} }
assertNotNull("the buffer must not be null", buffer); assertNotNull("the buffer must not be null", dictBuffer);
for (int i = 0; i < data.length; ++i) { for (int i = 0; i < data.length; ++i) {
assertEquals(data[i], buffer.readUnsignedByte()); assertEquals(data[i], dictBuffer.readUnsignedByte());
} }
} }
public void testGetBufferWithByteBuffer() { public void testGetBufferWithByteBuffer() {
runTestGetBuffer("testGetBufferWithByteBuffer", runTestGetBuffer("testGetBufferWithByteBuffer",
new FusionDictionaryBufferFromByteBufferFactory()); new DictionaryBufferFromReadOnlyByteBufferFactory());
} }
public void testGetBufferWithByteArray() { public void testGetBufferWithByteArray() {
runTestGetBuffer("testGetBufferWithByteArray", runTestGetBuffer("testGetBufferWithByteArray",
new FusionDictionaryBufferFromByteArrayFactory()); new DictionaryBufferFromByteArrayFactory());
} }
public void testGetBufferWithWritableByteBuffer() { public void testGetBufferWithWritableByteBuffer() {
runTestGetBuffer("testGetBufferWithWritableByteBuffer", runTestGetBuffer("testGetBufferWithWritableByteBuffer",
new FusionDictionaryBufferFromWritableByteBufferFactory()); new DictionaryBufferFromWritableByteBufferFactory());
} }
} }

View file

@ -21,9 +21,9 @@ import android.test.MoreAsserts;
import android.test.suitebuilder.annotation.LargeTest; import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log; import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.BinaryDictReader. import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
FusionDictionaryBufferFromWritableByteBufferFactory; DictionaryBufferFromWritableByteBufferFactory;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader; import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray; import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString; import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -112,26 +112,26 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
Log.d(TAG, " end address = " + info.mEndAddress); Log.d(TAG, " end address = " + info.mEndAddress);
} }
private static void printNode(final FusionDictionaryBufferInterface buffer, private static void printNode(final DictBuffer dictBuffer,
final FormatSpec.FormatOptions formatOptions) { final FormatSpec.FormatOptions formatOptions) {
Log.d(TAG, "Node at " + buffer.position()); Log.d(TAG, "Node at " + dictBuffer.position());
final int count = BinaryDictDecoder.readCharGroupCount(buffer); final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
Log.d(TAG, " charGroupCount = " + count); Log.d(TAG, " charGroupCount = " + count);
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer, final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
buffer.position(), formatOptions); dictBuffer.position(), formatOptions);
printCharGroup(currentInfo); printCharGroup(currentInfo);
} }
if (formatOptions.mSupportsDynamicUpdate) { if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = buffer.readUnsignedInt24(); final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress); Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
} }
} }
private static void printBinaryFile(final BinaryDictReader dictReader) private static void printBinaryFile(final BinaryDictDecoder dictDecoder)
throws IOException, UnsupportedFormatException { throws IOException, UnsupportedFormatException {
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer(); final DictBuffer buffer = dictDecoder.getDictBuffer();
while (buffer.position() < buffer.limit()) { while (buffer.position() < buffer.limit()) {
printNode(buffer, fileHeader.mFormatOptions); printNode(buffer, fileHeader.mFormatOptions);
} }
@ -139,13 +139,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private int getWordPosition(final File file, final String word) { private int getWordPosition(final File file, final String word) {
int position = FormatSpec.NOT_VALID_WORD; int position = FormatSpec.NOT_VALID_WORD;
final BinaryDictReader dictReader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
FileInputStream inStream = null; FileInputStream inStream = null;
try { try {
inStream = new FileInputStream(file); inStream = new FileInputStream(file);
dictReader.openBuffer( dictDecoder.openDictBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
position = BinaryDictIOUtils.getTerminalPosition(dictReader, word); position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} finally { } finally {
@ -161,12 +161,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
private CharGroupInfo findWordFromFile(final File file, final String word) { private CharGroupInfo findWordFromFile(final File file, final String word) {
final BinaryDictReader dictReader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
CharGroupInfo info = null; CharGroupInfo info = null;
try { try {
dictReader.openBuffer( dictDecoder.openDictBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word); info = BinaryDictIOUtils.findWordByBinaryDictReader(dictDecoder, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }
@ -177,18 +177,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private long insertAndCheckWord(final File file, final String word, final int frequency, private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams, final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) { final ArrayList<WeightedString> shortcuts) {
final BinaryDictReader dictReader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
BufferedOutputStream outStream = null; BufferedOutputStream outStream = null;
long amountOfTime = -1; long amountOfTime = -1;
try { try {
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory()); dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
outStream = new BufferedOutputStream(new FileOutputStream(file, true)); outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) { if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word)); assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
} }
final long now = System.nanoTime(); final long now = System.nanoTime();
DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams, DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams,
shortcuts, false, false); shortcuts, false, false);
amountOfTime = System.nanoTime() - now; amountOfTime = System.nanoTime() - now;
outStream.flush(); outStream.flush();
@ -211,23 +211,23 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
} }
private void deleteWord(final File file, final String word) { private void deleteWord(final File file, final String word) {
final BinaryDictReader dictReader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try { try {
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory()); dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
DynamicBinaryDictIOUtils.deleteWord(dictReader, word); DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word);
} catch (IOException e) { } catch (IOException e) {
} catch (UnsupportedFormatException e) { } catch (UnsupportedFormatException e) {
} }
} }
private void checkReverseLookup(final File file, final String word, final int position) { private void checkReverseLookup(final File file, final String word, final int position) {
final BinaryDictReader dictReader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try { try {
final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer( final DictBuffer dictBuffer = dictDecoder.openAndGetDictBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader); final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
assertEquals(word, assertEquals(word,
BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(), BinaryDictDecoderUtils.getWordAtAddress(dictDecoder.getDictBuffer(),
fileHeader.mHeaderSize, position - fileHeader.mHeaderSize, fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
fileHeader.mFormatOptions).mWord); fileHeader.mFormatOptions).mWord);
} catch (IOException e) { } catch (IOException e) {

View file

@ -21,7 +21,7 @@ import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest; import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log; import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup; import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
@ -147,15 +147,16 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
} }
private void readDictFromFile(final File file, final OnAddWordListener listener) { private void readDictFromFile(final File file, final OnAddWordListener listener) {
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try { try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} catch (FileNotFoundException e) { } catch (FileNotFoundException e) {
Log.e(TAG, "file not found", e); Log.e(TAG, "file not found", e);
} catch (IOException e) { } catch (IOException e) {
Log.e(TAG, "IOException", e); Log.e(TAG, "IOException", e);
} }
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener); UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
} }
public void testGenerateFusionDictionary() { public void testGenerateFusionDictionary() {

View file

@ -28,7 +28,7 @@ LATINIME_ANNOTATIONS_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/annot
LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin
MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
USED_TARGETTED_UTILS := \ USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayWrapper.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \ $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java $(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java

View file

@ -16,8 +16,8 @@
package com.android.inputmethod.latin.dicttool; package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException; import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@ -97,7 +97,7 @@ public final class BinaryDictOffdeviceUtils {
// over and over, ending in a stack overflow. Hence we limit the depth at which we try // over and over, ending in a stack overflow. Hence we limit the depth at which we try
// decoding the file. // decoding the file.
if (depth > MAX_DECODE_DEPTH) return null; if (depth > MAX_DECODE_DEPTH) return null;
if (BinaryDictDecoder.isBinaryDictionary(src)) { if (BinaryDictDecoderUtils.isBinaryDictionary(src)) {
spec.mFile = src; spec.mFile = src;
return spec; return spec;
} }
@ -184,15 +184,15 @@ public final class BinaryDictOffdeviceUtils {
crash(filename, new RuntimeException( crash(filename, new RuntimeException(
filename + " does not seem to be a dictionary file")); filename + " does not seem to be a dictionary file"));
} else { } else {
final BinaryDictReader reader = new BinaryDictReader(decodedSpec.mFile); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodedSpec.mFile);
reader.openBuffer( dictDecoder.openDictBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory()); new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
if (report) { if (report) {
System.out.println("Format : Binary dictionary format"); System.out.println("Format : Binary dictionary format");
System.out.println("Packaging : " + decodedSpec.describeChain()); System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length()); System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
} }
return BinaryDictDecoder.readDictionaryBinary(reader, null); return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
} }
} }
} catch (IOException e) { } catch (IOException e) {

View file

@ -16,9 +16,9 @@
package com.android.inputmethod.latin.dicttool; package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.BinaryDictReader; import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec; import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.MakedictLog; import com.android.inputmethod.latin.makedict.MakedictLog;
@ -176,7 +176,7 @@ public class DictionaryMaker {
inputUnigramXml = filename; inputUnigramXml = filename;
} else if (CombinedInputOutput.isCombinedDictionary(filename)) { } else if (CombinedInputOutput.isCombinedDictionary(filename)) {
inputCombined = filename; inputCombined = filename;
} else if (BinaryDictDecoder.isBinaryDictionary(filename)) { } else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) {
inputBinary = filename; inputBinary = filename;
} else { } else {
throw new IllegalArgumentException( throw new IllegalArgumentException(
@ -198,7 +198,7 @@ public class DictionaryMaker {
} }
} else { } else {
if (null == inputBinary && null == inputUnigramXml) { if (null == inputBinary && null == inputUnigramXml) {
if (BinaryDictDecoder.isBinaryDictionary(arg)) { if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) {
inputBinary = arg; inputBinary = arg;
} else if (CombinedInputOutput.isCombinedDictionary(arg)) { } else if (CombinedInputOutput.isCombinedDictionary(arg)) {
inputCombined = arg; inputCombined = arg;
@ -266,9 +266,10 @@ public class DictionaryMaker {
private static FusionDictionary readBinaryFile(final String binaryFilename) private static FusionDictionary readBinaryFile(final String binaryFilename)
throws FileNotFoundException, IOException, UnsupportedFormatException { throws FileNotFoundException, IOException, UnsupportedFormatException {
final File file = new File(binaryFilename); final File file = new File(binaryFilename);
final BinaryDictReader reader = new BinaryDictReader(file); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); dictDecoder.openDictBuffer(
return BinaryDictDecoder.readDictionaryBinary(reader, null); new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
} }
/** /**

View file

@ -17,8 +17,8 @@
package com.android.inputmethod.latin.dicttool; package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder; import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder; import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions; import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary; import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions; import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
@ -67,9 +67,10 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step); assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
} }
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size()); assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
final BinaryDictReader reader = new BinaryDictReader(decodeSpec.mFile); final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodeSpec.mFile);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory()); dictDecoder.openDictBuffer(
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader, new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
final FusionDictionary resultDict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder,
null /* dict : an optional dictionary to add words to, or null */); null /* dict : an optional dictionary to add words to, or null */);
assertEquals("Dictionary can't be read back correctly", assertEquals("Dictionary can't be read back correctly",
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(), FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),