am 77bce05e: [Refactor] Rename BinaryDictReader and BinaryDictDecoder.

* commit '77bce05e6f6e3a988253f9305ae22e51f56f5b1a':
  [Refactor] Rename BinaryDictReader and BinaryDictDecoder.
This commit is contained in:
Yuichiro Hanada 2013-08-19 03:51:22 -07:00 committed by Android Git Automerger
commit 4893fe5719
19 changed files with 1231 additions and 1202 deletions

View file

@ -21,7 +21,7 @@ import android.content.SharedPreferences;
import android.content.res.AssetFileDescriptor;
import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.utils.CollectionUtils;
import com.android.inputmethod.latin.utils.DictionaryInfoUtils;
@ -231,17 +231,17 @@ final public class BinaryDictionaryGetter {
try {
// Read the version of the file
inStream = new FileInputStream(f);
final BinaryDictDecoder.ByteBufferWrapper buffer =
new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map(
final BinaryDictDecoderUtils.ByteBufferDictBuffer dictBuffer =
new BinaryDictDecoderUtils.ByteBufferDictBuffer(inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, f.length()));
final int magic = buffer.readInt();
final int magic = dictBuffer.readInt();
if (magic != FormatSpec.MAGIC_NUMBER) {
return false;
}
final int formatVersion = buffer.readInt();
final int headerSize = buffer.readInt();
final int formatVersion = dictBuffer.readInt();
final int headerSize = dictBuffer.readInt();
final HashMap<String, String> options = CollectionUtils.newHashMap();
BinaryDictDecoder.populateOptions(buffer, headerSize, options);
BinaryDictDecoderUtils.populateOptions(dictBuffer, headerSize, options);
final String version = options.get(VERSION_KEY);
if (null == version) {

View file

@ -17,35 +17,23 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import com.android.inputmethod.latin.utils.JniUtils;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
/**
* Decodes binary files for a FusionDictionary.
*
* All the methods in this class are static.
*/
public final class BinaryDictDecoder {
private static final boolean DBG = MakedictLog.DBG;
@UsedForTesting
public class BinaryDictDecoder implements HeaderReader {
static {
JniUtils.loadNativeLibrary();
@ -54,742 +42,148 @@ public final class BinaryDictDecoder {
// TODO: implement something sensical instead of just a phony method
private static native int doNothing();
private BinaryDictDecoder() {
// This utility class is not publicly instantiable.
}
private static final int MAX_JUMPS = 12;
@UsedForTesting
public interface FusionDictionaryBufferInterface {
public int readUnsignedByte();
public int readUnsignedShort();
public int readUnsignedInt24();
public int readInt();
public int position();
public void position(int newPosition);
public void put(final byte b);
public int limit();
@UsedForTesting
public int capacity();
}
public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
private ByteBuffer mBuffer;
public ByteBufferWrapper(final ByteBuffer buffer) {
mBuffer = buffer;
}
@Override
public int readUnsignedByte() {
return mBuffer.get() & 0xFF;
}
@Override
public int readUnsignedShort() {
return mBuffer.getShort() & 0xFFFF;
}
@Override
public int readUnsignedInt24() {
final int retval = readUnsignedByte();
return (retval << 16) + readUnsignedShort();
}
@Override
public int readInt() {
return mBuffer.getInt();
}
@Override
public int position() {
return mBuffer.position();
}
@Override
public void position(int newPos) {
mBuffer.position(newPos);
}
@Override
public void put(final byte b) {
mBuffer.put(b);
}
@Override
public int limit() {
return mBuffer.limit();
}
@Override
public int capacity() {
return mBuffer.capacity();
}
public interface DictionaryBufferFactory {
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException;
}
/**
* A class grouping utility function for our specific character encoding.
*/
static final class CharEncoding {
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
/**
* Helper method to find out whether this code fits on one byte
*/
private static boolean fitsOnOneByte(final int character) {
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
}
/**
* Compute the size of a character given its character code.
*
* Char format is:
* 1 byte = bbbbbbbb match
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
* 00011111 would be outside unicode.
* else: iso-latin-1 code
* This allows for the whole unicode range to be encoded, including chars outside of
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
* characters which should never happen anyway (and still work, but take 3 bytes).
*
* @param character the character code.
* @return the size in binary encoded-form, either 1 or 3 bytes.
*/
static int getCharSize(final int character) {
// See char encoding in FusionDictionary.java
if (fitsOnOneByte(character)) return 1;
if (FormatSpec.INVALID_CHARACTER == character) return 1;
return 3;
}
/**
* Compute the byte size of a character array.
*/
static int getCharArraySize(final int[] chars) {
int size = 0;
for (int character : chars) size += getCharSize(character);
return size;
}
/**
* Writes a char array to a byte buffer.
*
* @param codePoints the code point array to write.
* @param buffer the byte buffer to write to.
* @param index the index in buffer to write the character array to.
* @return the index after the last character.
*/
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
for (int codePoint : codePoints) {
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
return index;
}
/**
* Writes a string with our character format to a byte buffer.
*
* This will also write the terminator byte.
*
* @param buffer the byte buffer to write to.
* @param origin the offset to write from.
* @param word the string to write.
* @return the size written, in bytes.
*/
static int writeString(final byte[] buffer, final int origin,
final String word) {
final int length = word.length();
int index = origin;
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
return index - origin;
}
/**
* Writes a string with our character format to a ByteArrayOutputStream.
*
* This will also write the terminator byte.
*
* @param buffer the ByteArrayOutputStream to write to.
* @param word the string to write.
*/
static void writeString(final ByteArrayOutputStream buffer, final String word) {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer.write((byte) codePoint);
} else {
buffer.write((byte) (0xFF & (codePoint >> 16)));
buffer.write((byte) (0xFF & (codePoint >> 8)));
buffer.write((byte) (0xFF & codePoint));
}
}
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
}
/**
* Reads a string from a buffer. This is the converse of the above method.
*/
static String readString(final FusionDictionaryBufferInterface buffer) {
final StringBuilder s = new StringBuilder();
int character = readChar(buffer);
while (character != FormatSpec.INVALID_CHARACTER) {
s.appendCodePoint(character);
character = readChar(buffer);
}
return s.toString();
}
/**
* Reads a character from the buffer.
*
* This follows the character format documented earlier in this source file.
*
* @param buffer the buffer, positioned over an encoded character.
* @return the character code.
*/
static int readChar(final FusionDictionaryBufferInterface buffer) {
int character = buffer.readUnsignedByte();
if (!fitsOnOneByte(character)) {
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
return FormatSpec.INVALID_CHARACTER;
}
character <<= 16;
character += buffer.readUnsignedShort();
}
return character;
}
}
// Input methods: Read a binary dictionary to memory.
// readDictionaryBinary is the public entry point for them.
static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
final int optionFlags, final FormatOptions options) {
if (options.mSupportsDynamicUpdate) {
final int address = buffer.readUnsignedInt24();
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
if ((address & FormatSpec.MSB24) != 0) {
return -(address & FormatSpec.SINT24_MAX);
} else {
return address;
}
}
int address;
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
return buffer.readUnsignedByte();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
return buffer.readUnsignedShort();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
return buffer.readUnsignedInt24();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
static int readParentAddress(final FusionDictionaryBufferInterface buffer,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
final int parentAddress = buffer.readUnsignedInt24();
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
return sign * (parentAddress & FormatSpec.SINT24_MAX);
} else {
return FormatSpec.NO_PARENT_ADDRESS;
}
}
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
final int originalGroupAddress, final FormatOptions options) {
int addressPointer = originalGroupAddress;
final int flags = buffer.readUnsignedByte();
++addressPointer;
final int parentAddress = readParentAddress(buffer, options);
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
addressPointer += 3;
}
final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0;
int character = CharEncoding.readChar(buffer);
addressPointer += CharEncoding.getCharSize(character);
while (-1 != character) {
// FusionDictionary is making sure that the length of the word is smaller than
// MAX_WORD_LENGTH.
// So we'll never write past the end of CHARACTER_BUFFER.
CHARACTER_BUFFER[index++] = character;
character = CharEncoding.readChar(buffer);
addressPointer += CharEncoding.getCharSize(character);
}
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
} else {
final int character = CharEncoding.readChar(buffer);
addressPointer += CharEncoding.getCharSize(character);
characters = new int[] { character };
}
final int frequency;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
++addressPointer;
frequency = buffer.readUnsignedByte();
} else {
frequency = CharGroup.NOT_A_TERMINAL;
}
int childrenAddress = readChildrenAddress(buffer, flags, options);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
final int pointerBefore = buffer.position();
shortcutTargets = new ArrayList<WeightedString>();
buffer.readUnsignedShort(); // Skip the size
while (true) {
final int targetFlags = buffer.readUnsignedByte();
final String word = CharEncoding.readString(buffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
addressPointer += buffer.position() - pointerBefore;
}
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = buffer.readUnsignedByte();
++addressPointer;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = addressPointer;
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * buffer.readUnsignedByte();
addressPointer += 1;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * buffer.readUnsignedShort();
addressPointer += 2;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
final int offset = (buffer.readUnsignedByte() << 16)
+ buffer.readUnsignedShort();
bigramAddress += sign * offset;
addressPointer += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
MakedictLog.d("too many bigrams in a group.");
}
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams);
}
/**
* Reads and returns the char group count out of a buffer and forwards the pointer.
*/
public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) {
final int msb = buffer.readUnsignedByte();
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
return msb;
} else {
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+ buffer.readUnsignedByte();
}
}
/**
* Finds, as a string, the word at the address passed as an argument.
* Creates DictionaryBuffer using a ByteBuffer
*
* @param buffer the buffer to read from.
* @param headerSize the size of the header.
* @param address the address to seek.
* @param formatOptions file format options.
* @return the word with its frequency, as a weighted string.
* This class uses less memory than DictionaryBufferFromByteArrayFactory,
* but doesn't perform as fast.
* When operating on a big dictionary, this class is preferred.
*/
/* package for tests */ static WeightedString getWordAtAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions formatOptions) {
final WeightedString result;
final int originalPointer = buffer.position();
buffer.position(address);
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
} else {
result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address,
formatOptions);
}
buffer.position(originalPointer);
return result;
}
@SuppressWarnings("unused")
private static WeightedString getWordAtAddressWithParentAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions options) {
int currentAddress = address;
int frequency = Integer.MIN_VALUE;
final StringBuilder builder = new StringBuilder();
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
CharGroupInfo currentInfo;
int loopCounter = 0;
do {
buffer.position(currentAddress + headerSize);
currentInfo = readCharGroup(buffer, currentAddress, options);
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
if (DBG && loopCounter++ > MAX_JUMPS) {
MakedictLog.d("Too many jumps - probably a bug");
}
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
builder.insert(0,
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
return new WeightedString(builder.toString(), frequency);
}
private static WeightedString getWordAtAddressWithoutParentAddress(
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
final FormatOptions options) {
buffer.position(headerSize);
final int count = readCharGroupCount(buffer);
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
final StringBuilder builder = new StringBuilder();
WeightedString result = null;
CharGroupInfo last = null;
for (int i = count - 1; i >= 0; --i) {
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
groupOffset = info.mEndAddress;
if (info.mOriginalAddress == address) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
result = new WeightedString(builder.toString(), info.mFrequency);
break; // and return
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
if (info.mChildrenAddress > address) {
if (null == last) continue;
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
buffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(buffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
last = info;
}
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
buffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(buffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
}
return result;
}
/**
* Reads a single node array from a buffer.
*
* This methods reads the file at the current position. A node array is fully expected to start
* at the current position.
* This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes.
*
* @param buffer the buffer, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups.
* @param options file format options.
* @return the read node array with all his children already read.
*/
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
throws IOException {
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
final int nodeArrayOrigin = buffer.position() - headerSize;
do { // Scan the linked-list node.
final int nodeArrayHeadPosition = buffer.position() - headerSize;
final int count = readCharGroupCount(buffer);
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
for (PendingAttribute bigram : info.mBigrams) {
final WeightedString word = getWordAtAddress(
buffer, headerSize, bigram.mAddress, options);
final int reconstructedFrequency =
reconstructBigramFrequency(word.mFrequency, bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
}
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) {
final int currentPosition = buffer.position();
buffer.position(info.mChildrenAddress + headerSize);
children = readNodeArray(
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
buffer.position(currentPosition);
}
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else {
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
}
groupOffset = info.mEndAddress;
}
// reach the end of the array.
if (options.mSupportsDynamicUpdate) {
final int nextAddress = buffer.readUnsignedInt24();
if (nextAddress >= 0 && nextAddress < buffer.limit()) {
buffer.position(nextAddress);
} else {
break;
}
}
} while (options.mSupportsDynamicUpdate &&
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return nodeArray;
}
/**
* Helper function to get the binary format version from the header.
* @throws IOException
*/
private static int getFormatVersion(final FusionDictionaryBufferInterface buffer)
throws IOException {
final int magic = buffer.readInt();
if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort();
return FormatSpec.NOT_A_VERSION_NUMBER;
}
/**
* Helper function to get and validate the binary format version.
* @throws UnsupportedFormatException
* @throws IOException
*/
static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
throws IOException, UnsupportedFormatException {
final int version = getFormatVersion(buffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("This file has version " + version
+ ", but this implementation does not support versions above "
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
}
return version;
}
/**
* Reads a header from a buffer.
* @param headerReader the header reader
* @throws IOException
* @throws UnsupportedFormatException
*/
public static FileHeader readHeader(final HeaderReaderInterface headerReader)
throws IOException, UnsupportedFormatException {
final int version = headerReader.readVersion();
final int optionsFlags = headerReader.readOptionFlags();
final int headerSize = headerReader.readHeaderSize();
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
return header;
}
/**
* Reads options from a buffer and populate a map with their contents.
*
* The buffer is read at the current position, so the caller must take care the pointer
* is in the right place before calling this.
*/
public static void populateOptions(final FusionDictionaryBufferInterface buffer,
final int headerSize, final HashMap<String, String> options) {
while (buffer.position() < headerSize) {
final String key = CharEncoding.readString(buffer);
final String value = CharEncoding.readString(buffer);
options.put(key, value);
}
}
/**
* Reads a buffer and returns the memory representation of the dictionary.
*
* This high-level method takes a buffer and reads its contents, populating a
* FusionDictionary structure. The optional dict argument is an existing dictionary to
* which words from the buffer should be added. If it is null, a new dictionary is created.
*
* @param reader the reader.
* @param dict an optional dictionary to add words to, or null.
* @return the created (or merged) dictionary.
*/
@UsedForTesting
public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader,
final FusionDictionary dict) throws FileNotFoundException, IOException,
UnsupportedFormatException {
// if the buffer has not been opened, open the buffer with bytebuffer.
if (reader.getBuffer() == null) reader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
if (reader.getBuffer() == null) {
MakedictLog.e("Cannot open the buffer");
}
// Read header
final FileHeader fileHeader = readHeader(reader);
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) {
for (final Word w : dict) {
if (w.mIsBlacklistEntry) {
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
} else {
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
}
}
for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to
// words that are not also registered as unigrams so we don't have to avoid
// them explicitly here.
for (final WeightedString bigram : w.mBigrams) {
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
}
}
}
return newDict;
}
/**
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
*/
public static boolean isBinaryDictionary(final String filename) {
final File file = new File(filename);
return isBinaryDictionary(file);
}
/**
* Basic test to find out whether the file is a binary dictionary or not.
*
* Concretely this only tests the magic number.
*
* @param file The file to test.
* @return true if it's a binary dictionary, false otherwise
*/
public static boolean isBinaryDictionary(final File file) {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, file.length());
final int version = getFormatVersion(new ByteBufferWrapper(buffer));
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
return false;
} finally {
if (inStream != null) {
try {
public static final class DictionaryBufferFromReadOnlyByteBufferFactory
implements DictionaryBufferFactory {
@Override
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
ByteBuffer buffer = null;
try {
inStream = new FileInputStream(file);
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
0, file.length());
} finally {
if (inStream != null) {
inStream.close();
}
}
if (buffer != null) {
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
}
return null;
}
}
/**
* Creates DictionaryBuffer using a byte array
*
* This class performs faster than other classes, but consumes more memory.
* When operating on a small dictionary, this class is preferred.
*/
public static final class DictionaryBufferFromByteArrayFactory
implements DictionaryBufferFactory {
@Override
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final byte[] array = new byte[(int) file.length()];
inStream.read(array);
return new ByteArrayDictBuffer(array);
} finally {
if (inStream != null) {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
}
}
/**
* Calculate bigram frequency from compressed value
* Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
*
* @param unigramFrequency
* @param bigramFrequency compressed frequency
* @return approximate bigram frequency
* This class doesn't perform as fast as other classes,
* but this class is the only option available for destructive operations (insert or delete)
* on a dictionary.
*/
public static int reconstructBigramFrequency(final int unigramFrequency,
final int bigramFrequency) {
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
return (int)resultFreqFloat;
@UsedForTesting
public static final class DictionaryBufferFromWritableByteBufferFactory
implements DictionaryBufferFactory {
@Override
public DictBuffer getDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
RandomAccessFile raFile = null;
ByteBuffer buffer = null;
try {
raFile = new RandomAccessFile(file, "rw");
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
} finally {
if (raFile != null) {
raFile.close();
}
}
if (buffer != null) {
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
}
return null;
}
}
private final File mDictionaryBinaryFile;
private DictBuffer mDictBuffer;
public BinaryDictDecoder(final File file) {
mDictionaryBinaryFile = file;
mDictBuffer = null;
}
public void openDictBuffer(final DictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile);
}
public DictBuffer getDictBuffer() {
return mDictBuffer;
}
@UsedForTesting
public DictBuffer openAndGetDictBuffer(
final DictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
openDictBuffer(factory);
return getDictBuffer();
}
// The implementation of HeaderReader
@Override
public int readVersion() throws IOException, UnsupportedFormatException {
return BinaryDictDecoderUtils.checkFormatVersion(mDictBuffer);
}
@Override
public int readOptionFlags() {
return mDictBuffer.readUnsignedShort();
}
@Override
public int readHeaderSize() {
return mDictBuffer.readInt();
}
@Override
public HashMap<String, String> readAttributes(final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (mDictBuffer.position() < headerSize) {
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
// by calling CharEncoding.readString.
final String key = CharEncoding.readString(mDictBuffer);
final String value = CharEncoding.readString(mDictBuffer);
attributes.put(key, value);
}
mDictBuffer.position(headerSize);
return attributes;
}
}

View file

@ -0,0 +1,777 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.TreeMap;
/**
* Decodes binary files for a FusionDictionary.
*
* All the methods in this class are static.
*
* TODO: Remove calls from classes except BinaryDictDecoder
* TODO: Move this file to makedict/internal.
*/
public final class BinaryDictDecoderUtils {
private static final boolean DBG = MakedictLog.DBG;
private BinaryDictDecoderUtils() {
// This utility class is not publicly instantiable.
}
private static final int MAX_JUMPS = 12;
@UsedForTesting
public interface DictBuffer {
public int readUnsignedByte();
public int readUnsignedShort();
public int readUnsignedInt24();
public int readInt();
public int position();
public void position(int newPosition);
public void put(final byte b);
public int limit();
@UsedForTesting
public int capacity();
}
public static final class ByteBufferDictBuffer implements DictBuffer {
private ByteBuffer mBuffer;
public ByteBufferDictBuffer(final ByteBuffer buffer) {
mBuffer = buffer;
}
@Override
public int readUnsignedByte() {
return mBuffer.get() & 0xFF;
}
@Override
public int readUnsignedShort() {
return mBuffer.getShort() & 0xFFFF;
}
@Override
public int readUnsignedInt24() {
final int retval = readUnsignedByte();
return (retval << 16) + readUnsignedShort();
}
@Override
public int readInt() {
return mBuffer.getInt();
}
@Override
public int position() {
return mBuffer.position();
}
@Override
public void position(int newPos) {
mBuffer.position(newPos);
}
@Override
public void put(final byte b) {
mBuffer.put(b);
}
@Override
public int limit() {
return mBuffer.limit();
}
@Override
public int capacity() {
return mBuffer.capacity();
}
}
/**
* A class grouping utility function for our specific character encoding.
*/
static final class CharEncoding {
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
/**
* Helper method to find out whether this code fits on one byte
*/
private static boolean fitsOnOneByte(final int character) {
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
}
/**
* Compute the size of a character given its character code.
*
* Char format is:
* 1 byte = bbbbbbbb match
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
* 00011111 would be outside unicode.
* else: iso-latin-1 code
* This allows for the whole unicode range to be encoded, including chars outside of
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
* characters which should never happen anyway (and still work, but take 3 bytes).
*
* @param character the character code.
* @return the size in binary encoded-form, either 1 or 3 bytes.
*/
static int getCharSize(final int character) {
// See char encoding in FusionDictionary.java
if (fitsOnOneByte(character)) return 1;
if (FormatSpec.INVALID_CHARACTER == character) return 1;
return 3;
}
/**
* Compute the byte size of a character array.
*/
static int getCharArraySize(final int[] chars) {
int size = 0;
for (int character : chars) size += getCharSize(character);
return size;
}
/**
* Writes a char array to a byte buffer.
*
* @param codePoints the code point array to write.
* @param buffer the byte buffer to write to.
* @param index the index in buffer to write the character array to.
* @return the index after the last character.
*/
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
for (int codePoint : codePoints) {
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
return index;
}
/**
* Writes a string with our character format to a byte buffer.
*
* This will also write the terminator byte.
*
* @param buffer the byte buffer to write to.
* @param origin the offset to write from.
* @param word the string to write.
* @return the size written, in bytes.
*/
static int writeString(final byte[] buffer, final int origin,
final String word) {
final int length = word.length();
int index = origin;
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer[index++] = (byte)codePoint;
} else {
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
buffer[index++] = (byte)(0xFF & codePoint);
}
}
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
return index - origin;
}
/**
* Writes a string with our character format to a ByteArrayOutputStream.
*
* This will also write the terminator byte.
*
* @param buffer the ByteArrayOutputStream to write to.
* @param word the string to write.
*/
static void writeString(final ByteArrayOutputStream buffer, final String word) {
final int length = word.length();
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
final int codePoint = word.codePointAt(i);
if (1 == getCharSize(codePoint)) {
buffer.write((byte) codePoint);
} else {
buffer.write((byte) (0xFF & (codePoint >> 16)));
buffer.write((byte) (0xFF & (codePoint >> 8)));
buffer.write((byte) (0xFF & codePoint));
}
}
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
}
/**
* Reads a string from a DictBuffer. This is the converse of the above method.
*/
static String readString(final DictBuffer dictBuffer) {
final StringBuilder s = new StringBuilder();
int character = readChar(dictBuffer);
while (character != FormatSpec.INVALID_CHARACTER) {
s.appendCodePoint(character);
character = readChar(dictBuffer);
}
return s.toString();
}
/**
* Reads a character from the buffer.
*
* This follows the character format documented earlier in this source file.
*
* @param dictBuffer the buffer, positioned over an encoded character.
* @return the character code.
*/
static int readChar(final DictBuffer dictBuffer) {
int character = dictBuffer.readUnsignedByte();
if (!fitsOnOneByte(character)) {
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
return FormatSpec.INVALID_CHARACTER;
}
character <<= 16;
character += dictBuffer.readUnsignedShort();
}
return character;
}
}
// Input methods: Read a binary dictionary to memory.
// readDictionaryBinary is the public entry point for them.
static int readChildrenAddress(final DictBuffer dictBuffer,
final int optionFlags, final FormatOptions options) {
if (options.mSupportsDynamicUpdate) {
final int address = dictBuffer.readUnsignedInt24();
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
if ((address & FormatSpec.MSB24) != 0) {
return -(address & FormatSpec.SINT24_MAX);
} else {
return address;
}
}
int address;
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
return dictBuffer.readUnsignedByte();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
return dictBuffer.readUnsignedShort();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
return dictBuffer.readUnsignedInt24();
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
default:
return FormatSpec.NO_CHILDREN_ADDRESS;
}
}
static int readParentAddress(final DictBuffer dictBuffer,
final FormatOptions formatOptions) {
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
final int parentAddress = dictBuffer.readUnsignedInt24();
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
return sign * (parentAddress & FormatSpec.SINT24_MAX);
} else {
return FormatSpec.NO_PARENT_ADDRESS;
}
}
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
public static CharGroupInfo readCharGroup(final DictBuffer dictBuffer,
final int originalGroupAddress, final FormatOptions options) {
int addressPointer = originalGroupAddress;
final int flags = dictBuffer.readUnsignedByte();
++addressPointer;
final int parentAddress = readParentAddress(dictBuffer, options);
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
addressPointer += 3;
}
final int characters[];
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
int index = 0;
int character = CharEncoding.readChar(dictBuffer);
addressPointer += CharEncoding.getCharSize(character);
while (-1 != character) {
// FusionDictionary is making sure that the length of the word is smaller than
// MAX_WORD_LENGTH.
// So we'll never write past the end of CHARACTER_BUFFER.
CHARACTER_BUFFER[index++] = character;
character = CharEncoding.readChar(dictBuffer);
addressPointer += CharEncoding.getCharSize(character);
}
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
} else {
final int character = CharEncoding.readChar(dictBuffer);
addressPointer += CharEncoding.getCharSize(character);
characters = new int[] { character };
}
final int frequency;
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
++addressPointer;
frequency = dictBuffer.readUnsignedByte();
} else {
frequency = CharGroup.NOT_A_TERMINAL;
}
int childrenAddress = readChildrenAddress(dictBuffer, flags, options);
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
childrenAddress += addressPointer;
}
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
final int pointerBefore = dictBuffer.position();
shortcutTargets = new ArrayList<WeightedString>();
dictBuffer.readUnsignedShort(); // Skip the size
while (true) {
final int targetFlags = dictBuffer.readUnsignedByte();
final String word = CharEncoding.readString(dictBuffer);
shortcutTargets.add(new WeightedString(word,
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
addressPointer += dictBuffer.position() - pointerBefore;
}
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
bigrams = new ArrayList<PendingAttribute>();
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = dictBuffer.readUnsignedByte();
++addressPointer;
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
? 1 : -1;
int bigramAddress = addressPointer;
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
bigramAddress += sign * dictBuffer.readUnsignedByte();
addressPointer += 1;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
bigramAddress += sign * dictBuffer.readUnsignedShort();
addressPointer += 2;
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
final int offset = (dictBuffer.readUnsignedByte() << 16)
+ dictBuffer.readUnsignedShort();
bigramAddress += sign * offset;
addressPointer += 3;
break;
default:
throw new RuntimeException("Has bigrams with no address");
}
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress));
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
MakedictLog.d("too many bigrams in a group.");
}
}
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
parentAddress, childrenAddress, shortcutTargets, bigrams);
}
/**
* Reads and returns the char group count out of a buffer and forwards the pointer.
*/
public static int readCharGroupCount(final DictBuffer dictBuffer) {
final int msb = dictBuffer.readUnsignedByte();
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
return msb;
} else {
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+ dictBuffer.readUnsignedByte();
}
}
/**
* Finds, as a string, the word at the address passed as an argument.
*
* @param dictBuffer the buffer to read from.
* @param headerSize the size of the header.
* @param address the address to seek.
* @param formatOptions file format options.
* @return the word with its frequency, as a weighted string.
*/
/* package for tests */ static WeightedString getWordAtAddress(
final DictBuffer dictBuffer, final int headerSize, final int address,
final FormatOptions formatOptions) {
final WeightedString result;
final int originalPointer = dictBuffer.position();
dictBuffer.position(address);
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
result = getWordAtAddressWithParentAddress(dictBuffer, headerSize, address,
formatOptions);
} else {
result = getWordAtAddressWithoutParentAddress(dictBuffer, headerSize, address,
formatOptions);
}
dictBuffer.position(originalPointer);
return result;
}
@SuppressWarnings("unused")
private static WeightedString getWordAtAddressWithParentAddress(
final DictBuffer dictBuffer, final int headerSize, final int address,
final FormatOptions options) {
int currentAddress = address;
int frequency = Integer.MIN_VALUE;
final StringBuilder builder = new StringBuilder();
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
CharGroupInfo currentInfo;
int loopCounter = 0;
do {
dictBuffer.position(currentAddress + headerSize);
currentInfo = readCharGroup(dictBuffer, currentAddress, options);
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
if (DBG && loopCounter++ > MAX_JUMPS) {
MakedictLog.d("Too many jumps - probably a bug");
}
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
builder.insert(0,
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
}
return new WeightedString(builder.toString(), frequency);
}
private static WeightedString getWordAtAddressWithoutParentAddress(
final DictBuffer dictBuffer, final int headerSize, final int address,
final FormatOptions options) {
dictBuffer.position(headerSize);
final int count = readCharGroupCount(dictBuffer);
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
final StringBuilder builder = new StringBuilder();
WeightedString result = null;
CharGroupInfo last = null;
for (int i = count - 1; i >= 0; --i) {
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
groupOffset = info.mEndAddress;
if (info.mOriginalAddress == address) {
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
result = new WeightedString(builder.toString(), info.mFrequency);
break; // and return
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
if (info.mChildrenAddress > address) {
if (null == last) continue;
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
dictBuffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(dictBuffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
last = info;
}
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
dictBuffer.position(last.mChildrenAddress + headerSize);
i = readCharGroupCount(dictBuffer);
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
last = null;
continue;
}
}
return result;
}
/**
* Reads a single node array from a buffer.
*
* This methods reads the file at the current position. A node array is fully expected to start
* at the current position.
* This will recursively read other node arrays into the structure, populating the reverse
* maps on the fly and using them to keep track of already read nodes.
*
* @param dictBuffer the buffer, correctly positioned at the start of a node array.
* @param headerSize the size, in bytes, of the file header.
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
* @param reverseGroupMap a mapping from addresses to already read character groups.
* @param options file format options.
* @return the read node array with all his children already read.
*/
private static PtNodeArray readNodeArray(final DictBuffer dictBuffer,
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
throws IOException {
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
final int nodeArrayOrigin = dictBuffer.position() - headerSize;
do { // Scan the linked-list node.
final int nodeArrayHeadPosition = dictBuffer.position() - headerSize;
final int count = readCharGroupCount(dictBuffer);
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();
for (PendingAttribute bigram : info.mBigrams) {
final WeightedString word = getWordAtAddress(
dictBuffer, headerSize, bigram.mAddress, options);
final int reconstructedFrequency =
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
bigram.mFrequency);
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
}
}
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
if (null == children) {
final int currentPosition = dictBuffer.position();
dictBuffer.position(info.mChildrenAddress + headerSize);
children = readNodeArray(dictBuffer, headerSize, reverseNodeArrayMap,
reverseGroupMap, options);
dictBuffer.position(currentPosition);
}
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
} else {
nodeArrayContents.add(
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
info.mFrequency,
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
}
groupOffset = info.mEndAddress;
}
// reach the end of the array.
if (options.mSupportsDynamicUpdate) {
final int nextAddress = dictBuffer.readUnsignedInt24();
if (nextAddress >= 0 && nextAddress < dictBuffer.limit()) {
dictBuffer.position(nextAddress);
} else {
break;
}
}
} while (options.mSupportsDynamicUpdate &&
dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
return nodeArray;
}
/**
* Helper function to get the binary format version from the header.
* @throws IOException
*/
private static int getFormatVersion(final DictBuffer dictBuffer)
throws IOException {
final int magic = dictBuffer.readInt();
if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort();
return FormatSpec.NOT_A_VERSION_NUMBER;
}
/**
* Helper function to get and validate the binary format version.
* @throws UnsupportedFormatException
* @throws IOException
*/
static int checkFormatVersion(final DictBuffer dictBuffer)
throws IOException, UnsupportedFormatException {
final int version = getFormatVersion(dictBuffer);
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
throw new UnsupportedFormatException("This file has version " + version
+ ", but this implementation does not support versions above "
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
}
return version;
}
/**
* Reads a header from a buffer.
* @param headerReader the header reader
* @throws IOException
* @throws UnsupportedFormatException
*/
public static FileHeader readHeader(final HeaderReader headerReader)
throws IOException, UnsupportedFormatException {
final int version = headerReader.readVersion();
final int optionsFlags = headerReader.readOptionFlags();
final int headerSize = headerReader.readHeaderSize();
if (headerSize < 0) {
throw new UnsupportedFormatException("header size can't be negative.");
}
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
final FileHeader header = new FileHeader(headerSize,
new FusionDictionary.DictionaryOptions(attributes,
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
new FormatOptions(version,
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
return header;
}
/**
* Reads options from a buffer and populate a map with their contents.
*
* The buffer is read at the current position, so the caller must take care the pointer
* is in the right place before calling this.
*/
public static void populateOptions(final DictBuffer dictBuffer,
final int headerSize, final HashMap<String, String> options) {
while (dictBuffer.position() < headerSize) {
final String key = CharEncoding.readString(dictBuffer);
final String value = CharEncoding.readString(dictBuffer);
options.put(key, value);
}
}
/**
* Reads a buffer and returns the memory representation of the dictionary.
*
* This high-level method takes a buffer and reads its contents, populating a
* FusionDictionary structure. The optional dict argument is an existing dictionary to
* which words from the buffer should be added. If it is null, a new dictionary is created.
*
* @param dictDecoder the dict decoder.
* @param dict an optional dictionary to add words to, or null.
* @return the created (or merged) dictionary.
*/
@UsedForTesting
public static FusionDictionary readDictionaryBinary(final BinaryDictDecoder dictDecoder,
final FusionDictionary dict) throws FileNotFoundException, IOException,
UnsupportedFormatException {
// if the buffer has not been opened, open the buffer with bytebuffer.
if (dictDecoder.getDictBuffer() == null) dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
if (dictDecoder.getDictBuffer() == null) {
MakedictLog.e("Cannot open the buffer");
}
// Read header
final FileHeader fileHeader = readHeader(dictDecoder);
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
final PtNodeArray root = readNodeArray(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize,
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
if (null != dict) {
for (final Word w : dict) {
if (w.mIsBlacklistEntry) {
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
} else {
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
}
}
for (final Word w : dict) {
// By construction a binary dictionary may not have bigrams pointing to
// words that are not also registered as unigrams so we don't have to avoid
// them explicitly here.
for (final WeightedString bigram : w.mBigrams) {
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
}
}
}
return newDict;
}
/**
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
*/
public static boolean isBinaryDictionary(final String filename) {
final File file = new File(filename);
return isBinaryDictionary(file);
}
/**
* Basic test to find out whether the file is a binary dictionary or not.
*
* Concretely this only tests the magic number.
*
* @param file The file to test.
* @return true if it's a binary dictionary, false otherwise
*/
public static boolean isBinaryDictionary(final File file) {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final ByteBuffer buffer = inStream.getChannel().map(
FileChannel.MapMode.READ_ONLY, 0, file.length());
final int version = getFormatVersion(new ByteBufferDictBuffer(buffer));
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
} catch (FileNotFoundException e) {
return false;
} catch (IOException e) {
return false;
} finally {
if (inStream != null) {
try {
inStream.close();
} catch (IOException e) {
// do nothing
}
}
}
}
}

View file

@ -16,7 +16,7 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;

View file

@ -18,13 +18,13 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
import java.io.File;
import java.io.FileInputStream;
@ -62,7 +62,7 @@ public final class BinaryDictIOUtils {
* Retrieves all node arrays without recursive call.
*/
private static void readUnigramsAndBigramsBinaryInner(
final FusionDictionaryBufferInterface buffer, final int headerSize,
final DictBuffer dictBuffer, final int headerSize,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
final FormatOptions formatOptions) {
@ -82,11 +82,11 @@ public final class BinaryDictIOUtils {
p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
}
if (buffer.position() != p.mAddress) buffer.position(p.mAddress);
if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress);
if (index != p.mLength) index = p.mLength;
if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
p.mNumOfCharGroup = BinaryDictDecoder.readCharGroupCount(buffer);
p.mNumOfCharGroup = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
p.mAddress += getGroupCountSize(p.mNumOfCharGroup);
p.mPosition = 0;
}
@ -94,7 +94,7 @@ public final class BinaryDictIOUtils {
stack.pop();
continue;
}
CharGroupInfo info = BinaryDictDecoder.readCharGroup(buffer,
CharGroupInfo info = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
p.mAddress - headerSize, formatOptions);
for (int i = 0; i < info.mCharacters.length; ++i) {
pushedChars[index++] = info.mCharacters[i];
@ -114,7 +114,7 @@ public final class BinaryDictIOUtils {
if (p.mPosition == p.mNumOfCharGroup) {
if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = buffer.readUnsignedInt24();
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
// The node array has a forward link.
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
@ -127,7 +127,7 @@ public final class BinaryDictIOUtils {
}
} else {
// The node array has more groups.
p.mAddress = buffer.position();
p.mAddress = dictBuffer.position();
}
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
* Reads unigrams and bigrams from the binary file.
* Doesn't store a full memory representation of the dictionary.
*
* @param dictReader the dict reader.
* @param dictDecoder the dict decoder.
* @param words the map to store the address as a key and the word as a value.
* @param frequencies the map to store the address as a key and the frequency as a value.
* @param bigrams the map to store the address as a key and the list of address as a value.
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader,
public static void readUnigramsAndBigramsBinary(final BinaryDictDecoder dictDecoder,
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
UnsupportedFormatException {
// Read header
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words,
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
readUnigramsAndBigramsBinaryInner(dictDecoder.getDictBuffer(), header.mHeaderSize, words,
frequencies, bigrams, header.mFormatOptions);
}
@ -162,32 +162,32 @@ public final class BinaryDictIOUtils {
* Gets the address of the last CharGroup of the exact matching word in the dictionary.
* If no match is found, returns NOT_VALID_WORD.
*
* @param dictReader the dict reader.
* @param dictDecoder the dict decoder.
* @param word the word we search for.
* @return the address of the terminal node.
* @throws IOException if the file can't be read.
* @throws UnsupportedFormatException if the format of the file is not recognized.
*/
@UsedForTesting
public static int getTerminalPosition(final BinaryDictReader dictReader,
public static int getTerminalPosition(final BinaryDictDecoder dictDecoder,
final String word) throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (word == null) return FormatSpec.NOT_VALID_WORD;
if (buffer.position() != 0) buffer.position(0);
if (dictBuffer.position() != 0) dictBuffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
int wordPos = 0;
final int wordLen = word.codePointCount(0, word.length());
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
do {
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer);
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
boolean foundNextCharGroup = false;
for (int i = 0; i < charGroupCount; ++i) {
final int charGroupPos = buffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
buffer.position(), header.mFormatOptions);
final int charGroupPos = dictBuffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(
dictBuffer, dictBuffer.position(), header.mFormatOptions);
final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags,
header.mFormatOptions);
final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags,
@ -219,7 +219,7 @@ public final class BinaryDictIOUtils {
return FormatSpec.NOT_VALID_WORD;
}
foundNextCharGroup = true;
buffer.position(currentInfo.mChildrenAddress);
dictBuffer.position(currentInfo.mChildrenAddress);
break;
}
}
@ -233,11 +233,11 @@ public final class BinaryDictIOUtils {
return FormatSpec.NOT_VALID_WORD;
}
final int forwardLinkAddress = buffer.readUnsignedInt24();
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
return FormatSpec.NOT_VALID_WORD;
}
buffer.position(forwardLinkAddress);
dictBuffer.position(forwardLinkAddress);
} while(true);
}
return FormatSpec.NOT_VALID_WORD;
@ -246,12 +246,12 @@ public final class BinaryDictIOUtils {
/**
* @return the size written, in bytes. Always 3 bytes.
*/
static int writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer,
static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
final int value) {
final int absValue = Math.abs(value);
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
buffer.put((byte)((absValue >> 8) & 0xFF));
buffer.put((byte)(absValue & 0xFF));
dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
dictBuffer.put((byte)((absValue >> 8) & 0xFF));
dictBuffer.put((byte)(absValue & 0xFF));
return 3;
}
@ -289,31 +289,31 @@ public final class BinaryDictIOUtils {
return BinaryDictEncoder.getByteSize(value);
}
static void skipCharGroup(final FusionDictionaryBufferInterface buffer,
static void skipCharGroup(final DictBuffer dictBuffer,
final FormatOptions formatOptions) {
final int flags = buffer.readUnsignedByte();
BinaryDictDecoder.readParentAddress(buffer, formatOptions);
skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
BinaryDictDecoder.readChildrenAddress(buffer, flags, formatOptions);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
final int flags = dictBuffer.readUnsignedByte();
BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions);
skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
BinaryDictDecoderUtils.readChildrenAddress(dictBuffer, flags, formatOptions);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
final int shortcutsSize = buffer.readUnsignedShort();
buffer.position(buffer.position() + shortcutsSize
final int shortcutsSize = dictBuffer.readUnsignedShort();
dictBuffer.position(dictBuffer.position() + shortcutsSize
- FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE);
}
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
int bigramCount = 0;
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
final int bigramFlags = buffer.readUnsignedByte();
final int bigramFlags = dictBuffer.readUnsignedByte();
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
buffer.readUnsignedByte();
dictBuffer.readUnsignedByte();
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
buffer.readUnsignedShort();
dictBuffer.readUnsignedShort();
break;
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
buffer.readUnsignedInt24();
dictBuffer.readUnsignedInt24();
break;
}
if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break;
@ -324,15 +324,15 @@ public final class BinaryDictIOUtils {
}
}
static void skipString(final FusionDictionaryBufferInterface buffer,
static void skipString(final DictBuffer dictBuffer,
final boolean hasMultipleChars) {
if (hasMultipleChars) {
int character = CharEncoding.readChar(buffer);
int character = CharEncoding.readChar(dictBuffer);
while (character != FormatSpec.INVALID_CHARACTER) {
character = CharEncoding.readChar(buffer);
character = CharEncoding.readChar(dictBuffer);
}
} else {
CharEncoding.readChar(buffer);
CharEncoding.readChar(dictBuffer);
}
}
@ -508,24 +508,25 @@ public final class BinaryDictIOUtils {
}
/**
* Find a word using the BinaryDictReader.
* Find a word using the BinaryDictDecoder.
*
* @param dictReader the dict reader
* @param dictDecoder the dict reader
* @param word the word searched
* @return the found group
* @throws IOException
* @throws UnsupportedFormatException
*/
@UsedForTesting
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader,
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictDecoder dictDecoder,
final String word) throws IOException, UnsupportedFormatException {
int position = getTerminalPosition(dictReader, word);
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
int position = getTerminalPosition(dictDecoder, word);
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (position != FormatSpec.NOT_VALID_WORD) {
buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
buffer.position(position);
return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions);
dictBuffer.position(0);
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
dictBuffer.position(position);
return BinaryDictDecoderUtils.readCharGroup(dictBuffer, position,
header.mFormatOptions);
}
return null;
}
@ -544,21 +545,21 @@ public final class BinaryDictIOUtils {
final File file, final long offset, final long length)
throws FileNotFoundException, IOException, UnsupportedFormatException {
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
final BinaryDictReader dictReader = new BinaryDictReader(file);
dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() {
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
dictDecoder.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFactory() {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file)
public DictBuffer getDictionaryBuffer(File file)
throws FileNotFoundException, IOException {
final FileInputStream inStream = new FileInputStream(file);
try {
inStream.read(buffer);
return new ByteArrayWrapper(buffer);
return new ByteArrayDictBuffer(buffer);
} finally {
inStream.close();
}
}
});
return BinaryDictDecoder.readHeader(dictReader);
return BinaryDictDecoderUtils.readHeader(dictDecoder);
}
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
@ -636,4 +637,19 @@ public final class BinaryDictIOUtils {
return 0;
}
}
/**
* Calculate bigram frequency from compressed value
*
* @param unigramFrequency
* @param bigramFrequency compressed frequency
* @return approximate bigram frequency
*/
public static int reconstructBigramFrequency(final int unigramFrequency,
final int bigramFrequency) {
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
return (int)resultFreqFloat;
}
}

View file

@ -1,169 +0,0 @@
/*
* Copyright (C) 2013 The Android Open Source Project
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
public class BinaryDictReader implements HeaderReaderInterface {
public interface FusionDictionaryBufferFactory {
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException;
}
/**
* Creates FusionDictionaryBuffer from a ByteBuffer
*/
public static final class FusionDictionaryBufferFromByteBufferFactory
implements FusionDictionaryBufferFactory {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
ByteBuffer buffer = null;
try {
inStream = new FileInputStream(file);
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
0, file.length());
} finally {
if (inStream != null) {
inStream.close();
}
}
if (buffer != null) {
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
}
return null;
}
}
/**
* Creates FusionDictionaryBuffer from a byte array
*/
public static final class FusionDictionaryBufferFromByteArrayFactory
implements FusionDictionaryBufferFactory {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
final byte[] array = new byte[(int) file.length()];
inStream.read(array);
return new ByteArrayWrapper(array);
} finally {
if (inStream != null) {
inStream.close();
}
}
}
}
/**
* Creates FusionDictionaryBuffer from a RandomAccessFile.
*/
@UsedForTesting
public static final class FusionDictionaryBufferFromWritableByteBufferFactory
implements FusionDictionaryBufferFactory {
@Override
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
throws FileNotFoundException, IOException {
RandomAccessFile raFile = null;
ByteBuffer buffer = null;
try {
raFile = new RandomAccessFile(file, "rw");
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
} finally {
if (raFile != null) {
raFile.close();
}
}
if (buffer != null) {
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
}
return null;
}
}
private final File mDictionaryBinaryFile;
private FusionDictionaryBufferInterface mFusionDictionaryBuffer;
public BinaryDictReader(final File file) {
mDictionaryBinaryFile = file;
mFusionDictionaryBuffer = null;
}
public void openBuffer(final FusionDictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile);
}
public FusionDictionaryBufferInterface getBuffer() {
return mFusionDictionaryBuffer;
}
@UsedForTesting
public FusionDictionaryBufferInterface openAndGetBuffer(
final FusionDictionaryBufferFactory factory)
throws FileNotFoundException, IOException {
openBuffer(factory);
return getBuffer();
}
// The implementation of HeaderReaderInterface
@Override
public int readVersion() throws IOException, UnsupportedFormatException {
return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
}
@Override
public int readOptionFlags() {
return mFusionDictionaryBuffer.readUnsignedShort();
}
@Override
public int readHeaderSize() {
return mFusionDictionaryBuffer.readInt();
}
@Override
public HashMap<String, String> readAttributes(final int headerSize) {
final HashMap<String, String> attributes = new HashMap<String, String>();
while (mFusionDictionaryBuffer.position() < headerSize) {
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
// by calling CharEncoding.readString.
final String key = CharEncoding.readString(mFusionDictionaryBuffer);
final String value = CharEncoding.readString(mFusionDictionaryBuffer);
attributes.put(key, value);
}
mFusionDictionaryBuffer.position(headerSize);
return attributes;
}
}

View file

@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.Constants;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -49,142 +49,146 @@ public final class DynamicBinaryDictIOUtils {
/**
* Delete the word from the binary file.
*
* @param dictReader the dict reader.
* @param dictDecoder the dict decoder.
* @param word the word we delete
* @throws IOException
* @throws UnsupportedFormatException
*/
@UsedForTesting
public static void deleteWord(final BinaryDictReader dictReader, final String word)
public static void deleteWord(final BinaryDictDecoder dictDecoder, final String word)
throws IOException, UnsupportedFormatException {
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
buffer.position(0);
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
dictBuffer.position(0);
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
buffer.position(wordPosition);
final int flags = buffer.readUnsignedByte();
buffer.position(wordPosition);
buffer.put((byte)markAsDeleted(flags));
dictBuffer.position(wordPosition);
final int flags = dictBuffer.readUnsignedByte();
dictBuffer.position(wordPosition);
dictBuffer.put((byte)markAsDeleted(flags));
}
/**
* Update a parent address in a CharGroup that is referred to by groupOriginAddress.
*
* @param buffer the buffer to write.
* @param dictBuffer the DictBuffer to write.
* @param groupOriginAddress the address of the group.
* @param newParentAddress the absolute address of the parent.
* @param formatOptions file format options.
*/
public static void updateParentAddress(final FusionDictionaryBufferInterface buffer,
public static void updateParentAddress(final DictBuffer dictBuffer,
final int groupOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = buffer.position();
buffer.position(groupOriginAddress);
final int originalPosition = dictBuffer.position();
dictBuffer.position(groupOriginAddress);
if (!formatOptions.mSupportsDynamicUpdate) {
throw new RuntimeException("this file format does not support parent addresses");
}
final int flags = buffer.readUnsignedByte();
final int flags = dictBuffer.readUnsignedByte();
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
// If the group is moved, the parent address is stored in the destination group.
// We are guaranteed to process the destination group later, so there is no need to
// update anything here.
buffer.position(originalPosition);
dictBuffer.position(originalPosition);
return;
}
if (DBG) {
MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress);
}
final int parentOffset = newParentAddress - groupOriginAddress;
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, parentOffset);
buffer.position(originalPosition);
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset);
dictBuffer.position(originalPosition);
}
/**
* Update parent addresses in a node array stored at nodeOriginAddress.
*
* @param buffer the buffer to be modified.
* @param dictBuffer the DictBuffer to be modified.
* @param nodeOriginAddress the address of the node array to update.
* @param newParentAddress the address to be written.
* @param formatOptions file format options.
*/
public static void updateParentAddresses(final FusionDictionaryBufferInterface buffer,
public static void updateParentAddresses(final DictBuffer dictBuffer,
final int nodeOriginAddress, final int newParentAddress,
final FormatOptions formatOptions) {
final int originalPosition = buffer.position();
buffer.position(nodeOriginAddress);
final int originalPosition = dictBuffer.position();
dictBuffer.position(nodeOriginAddress);
do {
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
for (int i = 0; i < count; ++i) {
updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions);
BinaryDictIOUtils.skipCharGroup(buffer, formatOptions);
updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress,
formatOptions);
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
}
final int forwardLinkAddress = buffer.readUnsignedInt24();
buffer.position(forwardLinkAddress);
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
dictBuffer.position(forwardLinkAddress);
} while (formatOptions.mSupportsDynamicUpdate
&& buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
buffer.position(originalPosition);
&& dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
dictBuffer.position(originalPosition);
}
/**
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
*
* @param buffer the buffer to write.
* @param dictBuffer the DictBuffer to write.
* @param groupOriginAddress the address of the group.
* @param newChildrenAddress the absolute address of the child.
* @param formatOptions file format options.
*/
public static void updateChildrenAddress(final FusionDictionaryBufferInterface buffer,
public static void updateChildrenAddress(final DictBuffer dictBuffer,
final int groupOriginAddress, final int newChildrenAddress,
final FormatOptions formatOptions) {
final int originalPosition = buffer.position();
buffer.position(groupOriginAddress);
final int flags = buffer.readUnsignedByte();
final int parentAddress = BinaryDictDecoder.readParentAddress(buffer, formatOptions);
BinaryDictIOUtils.skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
final int originalPosition = dictBuffer.position();
dictBuffer.position(groupOriginAddress);
final int flags = dictBuffer.readUnsignedByte();
final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer,
formatOptions);
BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, childrenOffset);
buffer.position(originalPosition);
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - dictBuffer.position();
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, childrenOffset);
dictBuffer.position(originalPosition);
}
/**
* Helper method to move a char group to the tail of the file.
*/
private static int moveCharGroup(final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
final DictBuffer dictBuffer, final CharGroupInfo info,
final int nodeArrayOriginAddress, final int oldGroupAddress,
final FormatOptions formatOptions) throws IOException {
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
buffer.position(oldGroupAddress);
final int currentFlags = buffer.readUnsignedByte();
buffer.position(oldGroupAddress);
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
updateParentAddress(dictBuffer, oldGroupAddress, dictBuffer.limit() + 1, formatOptions);
dictBuffer.position(oldGroupAddress);
final int currentFlags = dictBuffer.readUnsignedByte();
dictBuffer.position(oldGroupAddress);
dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
int size = FormatSpec.GROUP_FLAGS_SIZE;
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
return size;
}
@SuppressWarnings("unused")
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
private static void updateForwardLink(final DictBuffer dictBuffer,
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
final FormatOptions formatOptions) {
buffer.position(nodeArrayOriginAddress);
dictBuffer.position(nodeArrayOriginAddress);
int jumpCount = 0;
while (jumpCount++ < MAX_JUMPS) {
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
for (int i = 0; i < count; ++i) BinaryDictIOUtils.skipCharGroup(buffer, formatOptions);
final int forwardLinkAddress = buffer.readUnsignedInt24();
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
for (int i = 0; i < count; ++i) {
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
}
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
dictBuffer.position(dictBuffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeArrayAddress);
return;
}
buffer.position(forwardLinkAddress);
dictBuffer.position(forwardLinkAddress);
}
if (DBG && jumpCount >= MAX_JUMPS) {
throw new RuntimeException("too many jumps, probably a bug.");
@ -204,7 +208,7 @@ public final class DynamicBinaryDictIOUtils {
* @param shortcutTargets the shortcut targets for this group.
* @param bigrams the bigrams for this group.
* @param destination the stream representing the tail of the file.
* @param buffer the buffer representing the (constant-size) body of the file.
* @param dictBuffer the DictBuffer representing the (constant-size) body of the file.
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
* @param oldGroupOrigin the old origin where this group used to be stored.
* @param formatOptions format options for this dictionary.
@ -215,7 +219,7 @@ public final class DynamicBinaryDictIOUtils {
final int length, final int flags, final int frequency, final int parentAddress,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
final DictBuffer dictBuffer, final int oldNodeArrayOrigin,
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
int size = 0;
final int newGroupOrigin = fileEndAddress + 1;
@ -228,7 +232,7 @@ public final class DynamicBinaryDictIOUtils {
flags, writtenCharacters, frequency, parentAddress,
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
bigrams);
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
moveCharGroup(destination, dictBuffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
formatOptions);
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
}
@ -236,7 +240,7 @@ public final class DynamicBinaryDictIOUtils {
/**
* Insert a word into a binary dictionary.
*
* @param dictReader the dict reader.
* @param dictDecoder the dict decoder.
* @param destination a stream to the underlying file, with the pointer at the end of the file.
* @param word the word to insert.
* @param frequency the frequency of the new word.
@ -249,16 +253,17 @@ public final class DynamicBinaryDictIOUtils {
// TODO: Support batch insertion.
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
@UsedForTesting
public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination,
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
public static void insertWord(final BinaryDictDecoder dictDecoder,
final OutputStream destination, final String word, final int frequency,
final ArrayList<WeightedString> bigramStrings,
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
final boolean isBlackListEntry)
throws IOException, UnsupportedFormatException {
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (bigramStrings != null) {
for (final WeightedString bigram : bigramStrings) {
int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord);
int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, bigram.mWord);
if (position == FormatSpec.NOT_VALID_WORD) {
// TODO: figure out what is the correct thing to do here.
} else {
@ -272,24 +277,24 @@ public final class DynamicBinaryDictIOUtils {
final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty();
// find the insert position of the word.
if (buffer.position() != 0) buffer.position(0);
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
if (dictBuffer.position() != 0) dictBuffer.position(0);
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position();
int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position();
final int[] codePoints = FusionDictionary.getCodePoints(word);
final int wordLen = codePoints.length;
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
if (wordPos >= wordLen) break;
nodeOriginAddress = buffer.position();
nodeOriginAddress = dictBuffer.position();
int nodeParentAddress = -1;
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer);
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
boolean foundNextGroup = false;
for (int i = 0; i < charGroupCount; ++i) {
address = buffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
buffer.position(), fileHeader.mFormatOptions);
address = dictBuffer.position();
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
dictBuffer.position(), fileHeader.mFormatOptions);
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
fileHeader.mFormatOptions);
if (isMovedGroup) continue;
@ -308,18 +313,18 @@ public final class DynamicBinaryDictIOUtils {
* after
* abc - d - ef
*/
final int newNodeAddress = buffer.limit();
final int newNodeAddress = dictBuffer.limit();
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
false /* isBlackListEntry */, fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
frequency, nodeParentAddress, shortcuts, bigrams, destination,
buffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions);
}
final CharGroupInfo newInfo2 = new CharGroupInfo(
@ -344,7 +349,7 @@ public final class DynamicBinaryDictIOUtils {
* - c
*/
final int newNodeAddress = buffer.limit();
final int newNodeAddress = dictBuffer.limit();
final int childrenAddress = currentInfo.mChildrenAddress;
// move prefix
@ -355,13 +360,13 @@ public final class DynamicBinaryDictIOUtils {
fileHeader.mFormatOptions);
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
destination, buffer, nodeOriginAddress, address,
destination, dictBuffer, nodeOriginAddress, address,
fileHeader.mFormatOptions);
final int[] suffixCharacters = Arrays.copyOfRange(
currentInfo.mCharacters, p, currentInfo.mCharacters.length);
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
newNodeAddress + written + 1, fileHeader.mFormatOptions);
}
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
@ -403,7 +408,7 @@ public final class DynamicBinaryDictIOUtils {
if (wordPos + currentInfo.mCharacters.length == wordLen) {
// the word exists in the dictionary.
// only update group.
final int newNodeAddress = buffer.limit();
final int newNodeAddress = dictBuffer.limit();
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
@ -412,7 +417,7 @@ public final class DynamicBinaryDictIOUtils {
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
bigrams);
moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address,
moveCharGroup(destination, dictBuffer, newInfo, nodeOriginAddress, address,
fileHeader.mFormatOptions);
return;
}
@ -430,8 +435,8 @@ public final class DynamicBinaryDictIOUtils {
* after
* ab - cd - e
*/
final int newNodeAddress = buffer.limit();
updateChildrenAddress(buffer, address, newNodeAddress,
final int newNodeAddress = dictBuffer.limit();
updateChildrenAddress(dictBuffer, address, newNodeAddress,
fileHeader.mFormatOptions);
final int newGroupAddress = newNodeAddress + 1;
final boolean hasMultipleChars = (wordLen - wordPos) > 1;
@ -445,7 +450,7 @@ public final class DynamicBinaryDictIOUtils {
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
return;
}
buffer.position(currentInfo.mChildrenAddress);
dictBuffer.position(currentInfo.mChildrenAddress);
foundNextGroup = true;
break;
}
@ -454,8 +459,8 @@ public final class DynamicBinaryDictIOUtils {
if (foundNextGroup) continue;
// reached the end of the array.
final int linkAddressPosition = buffer.position();
int nextLink = buffer.readUnsignedInt24();
final int linkAddressPosition = dictBuffer.position();
int nextLink = dictBuffer.readUnsignedInt24();
if ((nextLink & FormatSpec.MSB24) != 0) {
nextLink = -(nextLink & FormatSpec.SINT24_MAX);
}
@ -475,9 +480,9 @@ public final class DynamicBinaryDictIOUtils {
*/
// change the forward link address.
final int newNodeAddress = buffer.limit();
buffer.position(linkAddressPosition);
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
final int newNodeAddress = dictBuffer.limit();
dictBuffer.position(linkAddressPosition);
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress);
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
@ -490,7 +495,7 @@ public final class DynamicBinaryDictIOUtils {
return;
} else {
depth--;
buffer.position(nextLink);
dictBuffer.position(nextLink);
}
}
}

View file

@ -24,7 +24,7 @@ import java.util.HashMap;
/**
* An interface to read a binary dictionary file header.
*/
public interface HeaderReaderInterface {
public interface HeaderReader {
public int readVersion() throws IOException, UnsupportedFormatException;
public int readOptionFlags();
public int readHeaderSize();

View file

@ -28,7 +28,7 @@ import com.android.inputmethod.latin.ExpandableDictionary;
import com.android.inputmethod.latin.LatinImeLogger;
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
import com.android.inputmethod.latin.WordComposer;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.settings.Settings;
import com.android.inputmethod.latin.utils.CollectionUtils;
@ -241,10 +241,10 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableDictiona
};
// Load the dictionary from binary file
final BinaryDictReader reader = new BinaryDictReader(
final BinaryDictDecoder reader = new BinaryDictDecoder(
new File(getContext().getFilesDir(), fileName));
try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
reader.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
} catch (FileNotFoundException e) {
// This is an expected condition: we don't have a user history dictionary for this

View file

@ -16,17 +16,17 @@
package com.android.inputmethod.latin.utils;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
/**
* This class provides an implementation for the FusionDictionary buffer interface that is backed
* by a simpled byte array. It allows to create a binary dictionary in memory.
*/
public final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
public final class ByteArrayDictBuffer implements DictBuffer {
private byte[] mBuffer;
private int mPosition;
public ByteArrayWrapper(final byte[] buffer) {
public ByteArrayDictBuffer(final byte[] buffer) {
mBuffer = buffer;
mPosition = 0;
}

View file

@ -22,7 +22,6 @@ import com.android.inputmethod.annotations.UsedForTesting;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@ -119,13 +118,13 @@ public final class UserHistoryDictIOUtils {
/**
* Reads dictionary from file.
*/
public static void readDictionaryBinary(final BinaryDictReader reader,
public static void readDictionaryBinary(final BinaryDictDecoder dictDecoder,
final OnAddWordListener dict) {
final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
try {
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, unigrams, frequencies,
BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies,
bigrams);
} catch (IOException e) {
Log.e(TAG, "IO exception while reading file", e);
@ -157,7 +156,7 @@ public final class UserHistoryDictIOUtils {
continue;
}
to.setBigram(word1, word2,
BinaryDictDecoder.reconstructBigramFrequency(unigramFrequency,
BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
attr.mFrequency));
}
}

View file

@ -22,7 +22,7 @@ import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log;
import android.util.SparseArray;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
@ -44,7 +44,7 @@ import java.util.Random;
import java.util.Set;
/**
* Unit tests for BinaryDictDecoder and BinaryDictEncoder.
* Unit tests for BinaryDictDecoderUtils and BinaryDictEncoder.
*/
@LargeTest
public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
@ -118,14 +118,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// Utilities for test
/**
* Makes new buffer according to BUFFER_TYPE.
* Makes new DictBuffer according to BUFFER_TYPE.
*/
private void getBuffer(final BinaryDictReader reader, final int bufferType)
private void getDictBuffer(final BinaryDictDecoder dictDecoder, final int bufferType)
throws FileNotFoundException, IOException {
if (bufferType == USE_BYTE_BUFFER) {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
} else if (bufferType == USE_BYTE_ARRAY) {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
}
}
@ -269,14 +271,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap,
final int bufferType) {
long now, diff = -1;
final BinaryDictReader reader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
FusionDictionary dict = null;
try {
getBuffer(reader, bufferType);
assertNotNull(reader.getBuffer());
getDictBuffer(dictDecoder, bufferType);
assertNotNull(dictDecoder.getDictBuffer());
now = System.currentTimeMillis();
dict = BinaryDictDecoder.readDictionaryBinary(reader, null);
dict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
diff = System.currentTimeMillis() - now;
} catch (IOException e) {
Log.e(TAG, "IOException while reading dictionary", e);
@ -388,7 +390,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
actBigrams.get(word1).add(word2);
final int bigramFreq = BinaryDictDecoder.reconstructBigramFrequency(
final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
unigramFreq, attr.mFrequency);
assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
}
@ -407,12 +409,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
long now = -1, diff = -1;
final BinaryDictReader reader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try {
getBuffer(reader, bufferType);
assertNotNull("Can't get buffer.", reader.getBuffer());
getDictBuffer(dictDecoder, bufferType);
assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer());
now = System.currentTimeMillis();
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, resultWords, resultFreqs,
BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs,
resultBigrams);
diff = System.currentTimeMillis() - now;
} catch (IOException e) {
@ -497,31 +499,31 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
// Tests for getTerminalPosition
private String getWordFromBinary(final BinaryDictReader dictReader, final int address) {
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
if (buffer.position() != 0) buffer.position(0);
private String getWordFromBinary(final BinaryDictDecoder dictDecoder, final int address) {
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
if (dictBuffer.position() != 0) dictBuffer.position(0);
FileHeader fileHeader = null;
try {
fileHeader = BinaryDictDecoder.readHeader(dictReader);
fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
} catch (IOException e) {
return null;
} catch (UnsupportedFormatException e) {
return null;
}
if (fileHeader == null) return null;
return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize,
return BinaryDictDecoderUtils.getWordAtAddress(dictBuffer, fileHeader.mHeaderSize,
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
}
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,
boolean contained) {
private long runGetTerminalPosition(final BinaryDictDecoder dictDecoder, final String word,
int index, boolean contained) {
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
long diff = -1;
int position = -1;
try {
final long now = System.nanoTime();
position = BinaryDictIOUtils.getTerminalPosition(reader, word);
position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
diff = System.nanoTime() - now;
} catch (IOException e) {
Log.e(TAG, "IOException while getTerminalPosition", e);
@ -530,7 +532,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
}
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
if (contained) assertEquals(getWordFromBinary(reader, position), word);
if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
return diff;
}
@ -550,28 +552,29 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} catch (IOException e) {
// ignore
Log.e(TAG, "IOException while opening the buffer", e);
}
assertNotNull("Can't get the buffer", reader.getBuffer());
assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
try {
// too long word
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, longWord));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, longWord));
// null
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, null));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, null));
// empty string
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, ""));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, ""));
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
}
@ -579,7 +582,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
// Test a word that is contained within the dictionary.
long sum = 0;
for (int i = 0; i < sWords.size(); ++i) {
final long time = runGetTerminalPosition(reader, sWords.get(i), i, true);
final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true);
sum += time == -1 ? 0 : time;
}
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
@ -590,7 +593,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
for (int i = 0; i < 1000; ++i) {
final String word = generateWord(random, codePointSet);
if (sWords.indexOf(word) != -1) continue;
runGetTerminalPosition(reader, word, i, false);
runGetTerminalPosition(dictDecoder, word, i, false);
}
}
@ -610,28 +613,28 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
final BinaryDictReader reader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try {
reader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} catch (IOException e) {
// ignore
Log.e(TAG, "IOException while opening the buffer", e);
}
assertNotNull("Can't get the buffer", reader.getBuffer());
assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
try {
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0));
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5));
assertEquals(FormatSpec.NOT_VALID_WORD,
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
}

View file

@ -16,14 +16,14 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictReader.FusionDictionaryBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictReader.
FusionDictionaryBufferFromByteArrayFactory;
import com.android.inputmethod.latin.makedict.BinaryDictReader.
FusionDictionaryBufferFromByteBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictReader.
FusionDictionaryBufferFromWritableByteBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.DictionaryBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
DictionaryBufferFromByteArrayFactory;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
DictionaryBufferFromReadOnlyByteBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
DictionaryBufferFromWritableByteBufferFactory;
import android.test.AndroidTestCase;
import android.util.Log;
@ -33,10 +33,10 @@ import java.io.FileOutputStream;
import java.io.IOException;
/**
* Unit tests for BinaryDictReader
* Unit tests for BinaryDictDecoder
*/
public class BinaryDictReaderTests extends AndroidTestCase {
private static final String TAG = BinaryDictReaderTests.class.getSimpleName();
public class BinaryDictDecoderTests extends AndroidTestCase {
private static final String TAG = BinaryDictDecoderTests.class.getSimpleName();
private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
@ -61,7 +61,7 @@ public class BinaryDictReaderTests extends AndroidTestCase {
@SuppressWarnings("null")
public void runTestOpenBuffer(final String testName,
final FusionDictionaryBufferFactory factory) {
final DictionaryBufferFactory factory) {
File testFile = null;
try {
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
@ -70,9 +70,9 @@ public class BinaryDictReaderTests extends AndroidTestCase {
}
assertNotNull(testFile);
final BinaryDictReader reader = new BinaryDictReader(testFile);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
try {
reader.openBuffer(factory);
dictDecoder.openDictBuffer(factory);
} catch (Exception e) {
Log.e(TAG, "Failed to open the buffer", e);
}
@ -80,32 +80,32 @@ public class BinaryDictReaderTests extends AndroidTestCase {
writeDataToFile(testFile);
try {
reader.openBuffer(factory);
dictDecoder.openDictBuffer(factory);
} catch (Exception e) {
Log.e(TAG, "Raised the exception while opening buffer", e);
}
assertEquals(testFile.length(), reader.getBuffer().capacity());
assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
}
public void testOpenBufferWithByteBuffer() {
runTestOpenBuffer("testOpenBufferWithByteBuffer",
new FusionDictionaryBufferFromByteBufferFactory());
new DictionaryBufferFromReadOnlyByteBufferFactory());
}
public void testOpenBufferWithByteArray() {
runTestOpenBuffer("testOpenBufferWithByteArray",
new FusionDictionaryBufferFromByteArrayFactory());
new DictionaryBufferFromByteArrayFactory());
}
public void testOpenBufferWithWritableByteBuffer() {
runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
new FusionDictionaryBufferFromWritableByteBufferFactory());
new DictionaryBufferFromWritableByteBufferFactory());
}
@SuppressWarnings("null")
public void runTestGetBuffer(final String testName,
final FusionDictionaryBufferFactory factory) {
final DictionaryBufferFactory factory) {
File testFile = null;
try {
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
@ -113,40 +113,41 @@ public class BinaryDictReaderTests extends AndroidTestCase {
Log.e(TAG, "IOException while the creating temporary file", e);
}
final BinaryDictReader reader = new BinaryDictReader(testFile);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
// the default return value of getBuffer() must be null.
assertNull("the default return value of getBuffer() is not null", reader.getBuffer());
assertNull("the default return value of getBuffer() is not null",
dictDecoder.getDictBuffer());
writeDataToFile(testFile);
assertTrue(testFile.exists());
Log.d(TAG, "file length = " + testFile.length());
FusionDictionaryBufferInterface buffer = null;
DictBuffer dictBuffer = null;
try {
buffer = reader.openAndGetBuffer(factory);
dictBuffer = dictDecoder.openAndGetDictBuffer(factory);
} catch (IOException e) {
Log.e(TAG, "Failed to open and get the buffer", e);
}
assertNotNull("the buffer must not be null", buffer);
assertNotNull("the buffer must not be null", dictBuffer);
for (int i = 0; i < data.length; ++i) {
assertEquals(data[i], buffer.readUnsignedByte());
assertEquals(data[i], dictBuffer.readUnsignedByte());
}
}
public void testGetBufferWithByteBuffer() {
runTestGetBuffer("testGetBufferWithByteBuffer",
new FusionDictionaryBufferFromByteBufferFactory());
new DictionaryBufferFromReadOnlyByteBufferFactory());
}
public void testGetBufferWithByteArray() {
runTestGetBuffer("testGetBufferWithByteArray",
new FusionDictionaryBufferFromByteArrayFactory());
new DictionaryBufferFromByteArrayFactory());
}
public void testGetBufferWithWritableByteBuffer() {
runTestGetBuffer("testGetBufferWithWritableByteBuffer",
new FusionDictionaryBufferFromWritableByteBufferFactory());
new DictionaryBufferFromWritableByteBufferFactory());
}
}

View file

@ -21,9 +21,9 @@ import android.test.MoreAsserts;
import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
import com.android.inputmethod.latin.makedict.BinaryDictReader.
FusionDictionaryBufferFromWritableByteBufferFactory;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
DictionaryBufferFromWritableByteBufferFactory;
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
@ -112,26 +112,26 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
Log.d(TAG, " end address = " + info.mEndAddress);
}
private static void printNode(final FusionDictionaryBufferInterface buffer,
private static void printNode(final DictBuffer dictBuffer,
final FormatSpec.FormatOptions formatOptions) {
Log.d(TAG, "Node at " + buffer.position());
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
Log.d(TAG, "Node at " + dictBuffer.position());
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
Log.d(TAG, " charGroupCount = " + count);
for (int i = 0; i < count; ++i) {
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
buffer.position(), formatOptions);
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
dictBuffer.position(), formatOptions);
printCharGroup(currentInfo);
}
if (formatOptions.mSupportsDynamicUpdate) {
final int forwardLinkAddress = buffer.readUnsignedInt24();
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
}
}
private static void printBinaryFile(final BinaryDictReader dictReader)
private static void printBinaryFile(final BinaryDictDecoder dictDecoder)
throws IOException, UnsupportedFormatException {
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
final DictBuffer buffer = dictDecoder.getDictBuffer();
while (buffer.position() < buffer.limit()) {
printNode(buffer, fileHeader.mFormatOptions);
}
@ -139,13 +139,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private int getWordPosition(final File file, final String word) {
int position = FormatSpec.NOT_VALID_WORD;
final BinaryDictReader dictReader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
FileInputStream inStream = null;
try {
inStream = new FileInputStream(file);
dictReader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
position = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
} finally {
@ -161,12 +161,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
private CharGroupInfo findWordFromFile(final File file, final String word) {
final BinaryDictReader dictReader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
CharGroupInfo info = null;
try {
dictReader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word);
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictDecoder, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
}
@ -177,18 +177,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
private long insertAndCheckWord(final File file, final String word, final int frequency,
final boolean exist, final ArrayList<WeightedString> bigrams,
final ArrayList<WeightedString> shortcuts) {
final BinaryDictReader dictReader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
BufferedOutputStream outStream = null;
long amountOfTime = -1;
try {
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
if (!exist) {
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
}
final long now = System.nanoTime();
DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams,
DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams,
shortcuts, false, false);
amountOfTime = System.nanoTime() - now;
outStream.flush();
@ -211,23 +211,23 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
}
private void deleteWord(final File file, final String word) {
final BinaryDictReader dictReader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try {
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
DynamicBinaryDictIOUtils.deleteWord(dictReader, word);
dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word);
} catch (IOException e) {
} catch (UnsupportedFormatException e) {
}
}
private void checkReverseLookup(final File file, final String word, final int position) {
final BinaryDictReader dictReader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try {
final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
final DictBuffer dictBuffer = dictDecoder.openAndGetDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
assertEquals(word,
BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(),
BinaryDictDecoderUtils.getWordAtAddress(dictDecoder.getDictBuffer(),
fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
fileHeader.mFormatOptions).mWord);
} catch (IOException e) {

View file

@ -21,7 +21,7 @@ import android.test.AndroidTestCase;
import android.test.suitebuilder.annotation.LargeTest;
import android.util.Log;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
@ -147,15 +147,16 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
}
private void readDictFromFile(final File file, final OnAddWordListener listener) {
final BinaryDictReader reader = new BinaryDictReader(file);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
try {
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
} catch (FileNotFoundException e) {
Log.e(TAG, "file not found", e);
} catch (IOException e) {
Log.e(TAG, "IOException", e);
}
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
}
public void testGenerateFusionDictionary() {

View file

@ -28,7 +28,7 @@ LATINIME_ANNOTATIONS_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/annot
LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin
MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
USED_TARGETTED_UTILS := \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayWrapper.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java

View file

@ -16,8 +16,8 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
@ -97,7 +97,7 @@ public final class BinaryDictOffdeviceUtils {
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
// decoding the file.
if (depth > MAX_DECODE_DEPTH) return null;
if (BinaryDictDecoder.isBinaryDictionary(src)) {
if (BinaryDictDecoderUtils.isBinaryDictionary(src)) {
spec.mFile = src;
return spec;
}
@ -184,15 +184,15 @@ public final class BinaryDictOffdeviceUtils {
crash(filename, new RuntimeException(
filename + " does not seem to be a dictionary file"));
} else {
final BinaryDictReader reader = new BinaryDictReader(decodedSpec.mFile);
reader.openBuffer(
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodedSpec.mFile);
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
if (report) {
System.out.println("Format : Binary dictionary format");
System.out.println("Packaging : " + decodedSpec.describeChain());
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
}
return BinaryDictDecoder.readDictionaryBinary(reader, null);
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
}
}
} catch (IOException e) {

View file

@ -16,9 +16,9 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.FormatSpec;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.MakedictLog;
@ -176,7 +176,7 @@ public class DictionaryMaker {
inputUnigramXml = filename;
} else if (CombinedInputOutput.isCombinedDictionary(filename)) {
inputCombined = filename;
} else if (BinaryDictDecoder.isBinaryDictionary(filename)) {
} else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) {
inputBinary = filename;
} else {
throw new IllegalArgumentException(
@ -198,7 +198,7 @@ public class DictionaryMaker {
}
} else {
if (null == inputBinary && null == inputUnigramXml) {
if (BinaryDictDecoder.isBinaryDictionary(arg)) {
if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) {
inputBinary = arg;
} else if (CombinedInputOutput.isCombinedDictionary(arg)) {
inputCombined = arg;
@ -266,9 +266,10 @@ public class DictionaryMaker {
private static FusionDictionary readBinaryFile(final String binaryFilename)
throws FileNotFoundException, IOException, UnsupportedFormatException {
final File file = new File(binaryFilename);
final BinaryDictReader reader = new BinaryDictReader(file);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
return BinaryDictDecoder.readDictionaryBinary(reader, null);
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
}
/**

View file

@ -17,8 +17,8 @@
package com.android.inputmethod.latin.dicttool;
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
import com.android.inputmethod.latin.makedict.BinaryDictReader;
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
import com.android.inputmethod.latin.makedict.FusionDictionary;
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
@ -67,9 +67,10 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
}
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
final BinaryDictReader reader = new BinaryDictReader(decodeSpec.mFile);
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodeSpec.mFile);
dictDecoder.openDictBuffer(
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
final FusionDictionary resultDict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder,
null /* dict : an optional dictionary to add words to, or null */);
assertEquals("Dictionary can't be read back correctly",
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),