[Refactor] Rename BinaryDictReader and BinaryDictDecoder.
BinaryDictReader -> BinaryDictDecoder. BinaryDictDecoder -> BianryDictDecoderUtils. Change-Id: Iadf2153b379b760538ecda488dda4f17225e5f37main
parent
63155dfa77
commit
77bce05e6f
|
@ -21,7 +21,7 @@ import android.content.SharedPreferences;
|
||||||
import android.content.res.AssetFileDescriptor;
|
import android.content.res.AssetFileDescriptor;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
import com.android.inputmethod.latin.utils.DictionaryInfoUtils;
|
import com.android.inputmethod.latin.utils.DictionaryInfoUtils;
|
||||||
|
@ -231,17 +231,17 @@ final public class BinaryDictionaryGetter {
|
||||||
try {
|
try {
|
||||||
// Read the version of the file
|
// Read the version of the file
|
||||||
inStream = new FileInputStream(f);
|
inStream = new FileInputStream(f);
|
||||||
final BinaryDictDecoder.ByteBufferWrapper buffer =
|
final BinaryDictDecoderUtils.ByteBufferDictBuffer dictBuffer =
|
||||||
new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map(
|
new BinaryDictDecoderUtils.ByteBufferDictBuffer(inStream.getChannel().map(
|
||||||
FileChannel.MapMode.READ_ONLY, 0, f.length()));
|
FileChannel.MapMode.READ_ONLY, 0, f.length()));
|
||||||
final int magic = buffer.readInt();
|
final int magic = dictBuffer.readInt();
|
||||||
if (magic != FormatSpec.MAGIC_NUMBER) {
|
if (magic != FormatSpec.MAGIC_NUMBER) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
final int formatVersion = buffer.readInt();
|
final int formatVersion = dictBuffer.readInt();
|
||||||
final int headerSize = buffer.readInt();
|
final int headerSize = dictBuffer.readInt();
|
||||||
final HashMap<String, String> options = CollectionUtils.newHashMap();
|
final HashMap<String, String> options = CollectionUtils.newHashMap();
|
||||||
BinaryDictDecoder.populateOptions(buffer, headerSize, options);
|
BinaryDictDecoderUtils.populateOptions(dictBuffer, headerSize, options);
|
||||||
|
|
||||||
final String version = options.get(VERSION_KEY);
|
final String version = options.get(VERSION_KEY);
|
||||||
if (null == version) {
|
if (null == version) {
|
||||||
|
|
|
@ -17,35 +17,23 @@
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
|
||||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
|
|
||||||
import com.android.inputmethod.latin.utils.JniUtils;
|
import com.android.inputmethod.latin.utils.JniUtils;
|
||||||
|
|
||||||
import java.io.ByteArrayOutputStream;
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
import java.io.FileNotFoundException;
|
import java.io.FileNotFoundException;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.RandomAccessFile;
|
||||||
import java.nio.ByteBuffer;
|
import java.nio.ByteBuffer;
|
||||||
import java.nio.channels.FileChannel;
|
import java.nio.channels.FileChannel;
|
||||||
import java.util.ArrayList;
|
|
||||||
import java.util.Arrays;
|
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.Map;
|
|
||||||
import java.util.TreeMap;
|
|
||||||
|
|
||||||
/**
|
@UsedForTesting
|
||||||
* Decodes binary files for a FusionDictionary.
|
public class BinaryDictDecoder implements HeaderReader {
|
||||||
*
|
|
||||||
* All the methods in this class are static.
|
|
||||||
*/
|
|
||||||
public final class BinaryDictDecoder {
|
|
||||||
|
|
||||||
private static final boolean DBG = MakedictLog.DBG;
|
|
||||||
|
|
||||||
static {
|
static {
|
||||||
JniUtils.loadNativeLibrary();
|
JniUtils.loadNativeLibrary();
|
||||||
|
@ -54,742 +42,148 @@ public final class BinaryDictDecoder {
|
||||||
// TODO: implement something sensical instead of just a phony method
|
// TODO: implement something sensical instead of just a phony method
|
||||||
private static native int doNothing();
|
private static native int doNothing();
|
||||||
|
|
||||||
private BinaryDictDecoder() {
|
public interface DictionaryBufferFactory {
|
||||||
// This utility class is not publicly instantiable.
|
public DictBuffer getDictionaryBuffer(final File file)
|
||||||
}
|
throws FileNotFoundException, IOException;
|
||||||
|
|
||||||
private static final int MAX_JUMPS = 12;
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public interface FusionDictionaryBufferInterface {
|
|
||||||
public int readUnsignedByte();
|
|
||||||
public int readUnsignedShort();
|
|
||||||
public int readUnsignedInt24();
|
|
||||||
public int readInt();
|
|
||||||
public int position();
|
|
||||||
public void position(int newPosition);
|
|
||||||
public void put(final byte b);
|
|
||||||
public int limit();
|
|
||||||
@UsedForTesting
|
|
||||||
public int capacity();
|
|
||||||
}
|
|
||||||
|
|
||||||
public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
|
|
||||||
private ByteBuffer mBuffer;
|
|
||||||
|
|
||||||
public ByteBufferWrapper(final ByteBuffer buffer) {
|
|
||||||
mBuffer = buffer;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates DictionaryBuffer using a ByteBuffer
|
||||||
|
*
|
||||||
|
* This class uses less memory than DictionaryBufferFromByteArrayFactory,
|
||||||
|
* but doesn't perform as fast.
|
||||||
|
* When operating on a big dictionary, this class is preferred.
|
||||||
|
*/
|
||||||
|
public static final class DictionaryBufferFromReadOnlyByteBufferFactory
|
||||||
|
implements DictionaryBufferFactory {
|
||||||
@Override
|
@Override
|
||||||
public int readUnsignedByte() {
|
public DictBuffer getDictionaryBuffer(final File file)
|
||||||
return mBuffer.get() & 0xFF;
|
throws FileNotFoundException, IOException {
|
||||||
|
FileInputStream inStream = null;
|
||||||
|
ByteBuffer buffer = null;
|
||||||
|
try {
|
||||||
|
inStream = new FileInputStream(file);
|
||||||
|
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
|
||||||
|
0, file.length());
|
||||||
|
} finally {
|
||||||
|
if (inStream != null) {
|
||||||
|
inStream.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (buffer != null) {
|
||||||
|
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Creates DictionaryBuffer using a byte array
|
||||||
|
*
|
||||||
|
* This class performs faster than other classes, but consumes more memory.
|
||||||
|
* When operating on a small dictionary, this class is preferred.
|
||||||
|
*/
|
||||||
|
public static final class DictionaryBufferFromByteArrayFactory
|
||||||
|
implements DictionaryBufferFactory {
|
||||||
@Override
|
@Override
|
||||||
public int readUnsignedShort() {
|
public DictBuffer getDictionaryBuffer(final File file)
|
||||||
return mBuffer.getShort() & 0xFFFF;
|
throws FileNotFoundException, IOException {
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int readUnsignedInt24() {
|
|
||||||
final int retval = readUnsignedByte();
|
|
||||||
return (retval << 16) + readUnsignedShort();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int readInt() {
|
|
||||||
return mBuffer.getInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int position() {
|
|
||||||
return mBuffer.position();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void position(int newPos) {
|
|
||||||
mBuffer.position(newPos);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void put(final byte b) {
|
|
||||||
mBuffer.put(b);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int limit() {
|
|
||||||
return mBuffer.limit();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int capacity() {
|
|
||||||
return mBuffer.capacity();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* A class grouping utility function for our specific character encoding.
|
|
||||||
*/
|
|
||||||
static final class CharEncoding {
|
|
||||||
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
|
|
||||||
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method to find out whether this code fits on one byte
|
|
||||||
*/
|
|
||||||
private static boolean fitsOnOneByte(final int character) {
|
|
||||||
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
|
|
||||||
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the size of a character given its character code.
|
|
||||||
*
|
|
||||||
* Char format is:
|
|
||||||
* 1 byte = bbbbbbbb match
|
|
||||||
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
|
|
||||||
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
|
|
||||||
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
|
|
||||||
* 00011111 would be outside unicode.
|
|
||||||
* else: iso-latin-1 code
|
|
||||||
* This allows for the whole unicode range to be encoded, including chars outside of
|
|
||||||
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
|
|
||||||
* characters which should never happen anyway (and still work, but take 3 bytes).
|
|
||||||
*
|
|
||||||
* @param character the character code.
|
|
||||||
* @return the size in binary encoded-form, either 1 or 3 bytes.
|
|
||||||
*/
|
|
||||||
static int getCharSize(final int character) {
|
|
||||||
// See char encoding in FusionDictionary.java
|
|
||||||
if (fitsOnOneByte(character)) return 1;
|
|
||||||
if (FormatSpec.INVALID_CHARACTER == character) return 1;
|
|
||||||
return 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Compute the byte size of a character array.
|
|
||||||
*/
|
|
||||||
static int getCharArraySize(final int[] chars) {
|
|
||||||
int size = 0;
|
|
||||||
for (int character : chars) size += getCharSize(character);
|
|
||||||
return size;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes a char array to a byte buffer.
|
|
||||||
*
|
|
||||||
* @param codePoints the code point array to write.
|
|
||||||
* @param buffer the byte buffer to write to.
|
|
||||||
* @param index the index in buffer to write the character array to.
|
|
||||||
* @return the index after the last character.
|
|
||||||
*/
|
|
||||||
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
|
|
||||||
for (int codePoint : codePoints) {
|
|
||||||
if (1 == getCharSize(codePoint)) {
|
|
||||||
buffer[index++] = (byte)codePoint;
|
|
||||||
} else {
|
|
||||||
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
|
||||||
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
|
||||||
buffer[index++] = (byte)(0xFF & codePoint);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return index;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes a string with our character format to a byte buffer.
|
|
||||||
*
|
|
||||||
* This will also write the terminator byte.
|
|
||||||
*
|
|
||||||
* @param buffer the byte buffer to write to.
|
|
||||||
* @param origin the offset to write from.
|
|
||||||
* @param word the string to write.
|
|
||||||
* @return the size written, in bytes.
|
|
||||||
*/
|
|
||||||
static int writeString(final byte[] buffer, final int origin,
|
|
||||||
final String word) {
|
|
||||||
final int length = word.length();
|
|
||||||
int index = origin;
|
|
||||||
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
|
||||||
final int codePoint = word.codePointAt(i);
|
|
||||||
if (1 == getCharSize(codePoint)) {
|
|
||||||
buffer[index++] = (byte)codePoint;
|
|
||||||
} else {
|
|
||||||
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
|
||||||
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
|
||||||
buffer[index++] = (byte)(0xFF & codePoint);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
|
|
||||||
return index - origin;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Writes a string with our character format to a ByteArrayOutputStream.
|
|
||||||
*
|
|
||||||
* This will also write the terminator byte.
|
|
||||||
*
|
|
||||||
* @param buffer the ByteArrayOutputStream to write to.
|
|
||||||
* @param word the string to write.
|
|
||||||
*/
|
|
||||||
static void writeString(final ByteArrayOutputStream buffer, final String word) {
|
|
||||||
final int length = word.length();
|
|
||||||
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
|
||||||
final int codePoint = word.codePointAt(i);
|
|
||||||
if (1 == getCharSize(codePoint)) {
|
|
||||||
buffer.write((byte) codePoint);
|
|
||||||
} else {
|
|
||||||
buffer.write((byte) (0xFF & (codePoint >> 16)));
|
|
||||||
buffer.write((byte) (0xFF & (codePoint >> 8)));
|
|
||||||
buffer.write((byte) (0xFF & codePoint));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a string from a buffer. This is the converse of the above method.
|
|
||||||
*/
|
|
||||||
static String readString(final FusionDictionaryBufferInterface buffer) {
|
|
||||||
final StringBuilder s = new StringBuilder();
|
|
||||||
int character = readChar(buffer);
|
|
||||||
while (character != FormatSpec.INVALID_CHARACTER) {
|
|
||||||
s.appendCodePoint(character);
|
|
||||||
character = readChar(buffer);
|
|
||||||
}
|
|
||||||
return s.toString();
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a character from the buffer.
|
|
||||||
*
|
|
||||||
* This follows the character format documented earlier in this source file.
|
|
||||||
*
|
|
||||||
* @param buffer the buffer, positioned over an encoded character.
|
|
||||||
* @return the character code.
|
|
||||||
*/
|
|
||||||
static int readChar(final FusionDictionaryBufferInterface buffer) {
|
|
||||||
int character = buffer.readUnsignedByte();
|
|
||||||
if (!fitsOnOneByte(character)) {
|
|
||||||
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
|
|
||||||
return FormatSpec.INVALID_CHARACTER;
|
|
||||||
}
|
|
||||||
character <<= 16;
|
|
||||||
character += buffer.readUnsignedShort();
|
|
||||||
}
|
|
||||||
return character;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Input methods: Read a binary dictionary to memory.
|
|
||||||
// readDictionaryBinary is the public entry point for them.
|
|
||||||
|
|
||||||
static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
|
|
||||||
final int optionFlags, final FormatOptions options) {
|
|
||||||
if (options.mSupportsDynamicUpdate) {
|
|
||||||
final int address = buffer.readUnsignedInt24();
|
|
||||||
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
|
||||||
if ((address & FormatSpec.MSB24) != 0) {
|
|
||||||
return -(address & FormatSpec.SINT24_MAX);
|
|
||||||
} else {
|
|
||||||
return address;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
int address;
|
|
||||||
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
|
|
||||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
|
||||||
return buffer.readUnsignedByte();
|
|
||||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
|
||||||
return buffer.readUnsignedShort();
|
|
||||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
|
||||||
return buffer.readUnsignedInt24();
|
|
||||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
|
|
||||||
default:
|
|
||||||
return FormatSpec.NO_CHILDREN_ADDRESS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static int readParentAddress(final FusionDictionaryBufferInterface buffer,
|
|
||||||
final FormatOptions formatOptions) {
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
|
||||||
final int parentAddress = buffer.readUnsignedInt24();
|
|
||||||
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
|
|
||||||
return sign * (parentAddress & FormatSpec.SINT24_MAX);
|
|
||||||
} else {
|
|
||||||
return FormatSpec.NO_PARENT_ADDRESS;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
|
|
||||||
public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
|
|
||||||
final int originalGroupAddress, final FormatOptions options) {
|
|
||||||
int addressPointer = originalGroupAddress;
|
|
||||||
final int flags = buffer.readUnsignedByte();
|
|
||||||
++addressPointer;
|
|
||||||
|
|
||||||
final int parentAddress = readParentAddress(buffer, options);
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
|
|
||||||
addressPointer += 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
final int characters[];
|
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
|
|
||||||
int index = 0;
|
|
||||||
int character = CharEncoding.readChar(buffer);
|
|
||||||
addressPointer += CharEncoding.getCharSize(character);
|
|
||||||
while (-1 != character) {
|
|
||||||
// FusionDictionary is making sure that the length of the word is smaller than
|
|
||||||
// MAX_WORD_LENGTH.
|
|
||||||
// So we'll never write past the end of CHARACTER_BUFFER.
|
|
||||||
CHARACTER_BUFFER[index++] = character;
|
|
||||||
character = CharEncoding.readChar(buffer);
|
|
||||||
addressPointer += CharEncoding.getCharSize(character);
|
|
||||||
}
|
|
||||||
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
|
|
||||||
} else {
|
|
||||||
final int character = CharEncoding.readChar(buffer);
|
|
||||||
addressPointer += CharEncoding.getCharSize(character);
|
|
||||||
characters = new int[] { character };
|
|
||||||
}
|
|
||||||
final int frequency;
|
|
||||||
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
|
|
||||||
++addressPointer;
|
|
||||||
frequency = buffer.readUnsignedByte();
|
|
||||||
} else {
|
|
||||||
frequency = CharGroup.NOT_A_TERMINAL;
|
|
||||||
}
|
|
||||||
int childrenAddress = readChildrenAddress(buffer, flags, options);
|
|
||||||
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
|
||||||
childrenAddress += addressPointer;
|
|
||||||
}
|
|
||||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
|
||||||
ArrayList<WeightedString> shortcutTargets = null;
|
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
|
||||||
final int pointerBefore = buffer.position();
|
|
||||||
shortcutTargets = new ArrayList<WeightedString>();
|
|
||||||
buffer.readUnsignedShort(); // Skip the size
|
|
||||||
while (true) {
|
|
||||||
final int targetFlags = buffer.readUnsignedByte();
|
|
||||||
final String word = CharEncoding.readString(buffer);
|
|
||||||
shortcutTargets.add(new WeightedString(word,
|
|
||||||
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
|
|
||||||
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
|
||||||
}
|
|
||||||
addressPointer += buffer.position() - pointerBefore;
|
|
||||||
}
|
|
||||||
ArrayList<PendingAttribute> bigrams = null;
|
|
||||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
|
||||||
bigrams = new ArrayList<PendingAttribute>();
|
|
||||||
int bigramCount = 0;
|
|
||||||
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
|
||||||
final int bigramFlags = buffer.readUnsignedByte();
|
|
||||||
++addressPointer;
|
|
||||||
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
|
|
||||||
? 1 : -1;
|
|
||||||
int bigramAddress = addressPointer;
|
|
||||||
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
|
||||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
|
||||||
bigramAddress += sign * buffer.readUnsignedByte();
|
|
||||||
addressPointer += 1;
|
|
||||||
break;
|
|
||||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
|
||||||
bigramAddress += sign * buffer.readUnsignedShort();
|
|
||||||
addressPointer += 2;
|
|
||||||
break;
|
|
||||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
|
||||||
final int offset = (buffer.readUnsignedByte() << 16)
|
|
||||||
+ buffer.readUnsignedShort();
|
|
||||||
bigramAddress += sign * offset;
|
|
||||||
addressPointer += 3;
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
throw new RuntimeException("Has bigrams with no address");
|
|
||||||
}
|
|
||||||
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
|
|
||||||
bigramAddress));
|
|
||||||
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
|
||||||
}
|
|
||||||
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
|
||||||
MakedictLog.d("too many bigrams in a group.");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
|
|
||||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads and returns the char group count out of a buffer and forwards the pointer.
|
|
||||||
*/
|
|
||||||
public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) {
|
|
||||||
final int msb = buffer.readUnsignedByte();
|
|
||||||
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
|
|
||||||
return msb;
|
|
||||||
} else {
|
|
||||||
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
|
|
||||||
+ buffer.readUnsignedByte();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Finds, as a string, the word at the address passed as an argument.
|
|
||||||
*
|
|
||||||
* @param buffer the buffer to read from.
|
|
||||||
* @param headerSize the size of the header.
|
|
||||||
* @param address the address to seek.
|
|
||||||
* @param formatOptions file format options.
|
|
||||||
* @return the word with its frequency, as a weighted string.
|
|
||||||
*/
|
|
||||||
/* package for tests */ static WeightedString getWordAtAddress(
|
|
||||||
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
|
|
||||||
final FormatOptions formatOptions) {
|
|
||||||
final WeightedString result;
|
|
||||||
final int originalPointer = buffer.position();
|
|
||||||
buffer.position(address);
|
|
||||||
|
|
||||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
|
||||||
result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
|
|
||||||
} else {
|
|
||||||
result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address,
|
|
||||||
formatOptions);
|
|
||||||
}
|
|
||||||
|
|
||||||
buffer.position(originalPointer);
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
|
||||||
private static WeightedString getWordAtAddressWithParentAddress(
|
|
||||||
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
|
|
||||||
final FormatOptions options) {
|
|
||||||
int currentAddress = address;
|
|
||||||
int frequency = Integer.MIN_VALUE;
|
|
||||||
final StringBuilder builder = new StringBuilder();
|
|
||||||
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
|
||||||
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
|
|
||||||
CharGroupInfo currentInfo;
|
|
||||||
int loopCounter = 0;
|
|
||||||
do {
|
|
||||||
buffer.position(currentAddress + headerSize);
|
|
||||||
currentInfo = readCharGroup(buffer, currentAddress, options);
|
|
||||||
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
|
|
||||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
|
||||||
}
|
|
||||||
if (DBG && loopCounter++ > MAX_JUMPS) {
|
|
||||||
MakedictLog.d("Too many jumps - probably a bug");
|
|
||||||
}
|
|
||||||
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
|
|
||||||
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
|
|
||||||
builder.insert(0,
|
|
||||||
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
|
|
||||||
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
|
|
||||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
|
||||||
}
|
|
||||||
return new WeightedString(builder.toString(), frequency);
|
|
||||||
}
|
|
||||||
|
|
||||||
private static WeightedString getWordAtAddressWithoutParentAddress(
|
|
||||||
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
|
|
||||||
final FormatOptions options) {
|
|
||||||
buffer.position(headerSize);
|
|
||||||
final int count = readCharGroupCount(buffer);
|
|
||||||
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
|
|
||||||
final StringBuilder builder = new StringBuilder();
|
|
||||||
WeightedString result = null;
|
|
||||||
|
|
||||||
CharGroupInfo last = null;
|
|
||||||
for (int i = count - 1; i >= 0; --i) {
|
|
||||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
|
||||||
groupOffset = info.mEndAddress;
|
|
||||||
if (info.mOriginalAddress == address) {
|
|
||||||
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
|
|
||||||
result = new WeightedString(builder.toString(), info.mFrequency);
|
|
||||||
break; // and return
|
|
||||||
}
|
|
||||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
|
||||||
if (info.mChildrenAddress > address) {
|
|
||||||
if (null == last) continue;
|
|
||||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
|
||||||
buffer.position(last.mChildrenAddress + headerSize);
|
|
||||||
i = readCharGroupCount(buffer);
|
|
||||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
|
||||||
last = null;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
last = info;
|
|
||||||
}
|
|
||||||
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
|
|
||||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
|
||||||
buffer.position(last.mChildrenAddress + headerSize);
|
|
||||||
i = readCharGroupCount(buffer);
|
|
||||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
|
||||||
last = null;
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return result;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a single node array from a buffer.
|
|
||||||
*
|
|
||||||
* This methods reads the file at the current position. A node array is fully expected to start
|
|
||||||
* at the current position.
|
|
||||||
* This will recursively read other node arrays into the structure, populating the reverse
|
|
||||||
* maps on the fly and using them to keep track of already read nodes.
|
|
||||||
*
|
|
||||||
* @param buffer the buffer, correctly positioned at the start of a node array.
|
|
||||||
* @param headerSize the size, in bytes, of the file header.
|
|
||||||
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
|
|
||||||
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
|
||||||
* @param options file format options.
|
|
||||||
* @return the read node array with all his children already read.
|
|
||||||
*/
|
|
||||||
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
|
|
||||||
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
|
|
||||||
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
|
|
||||||
throws IOException {
|
|
||||||
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
|
||||||
final int nodeArrayOrigin = buffer.position() - headerSize;
|
|
||||||
|
|
||||||
do { // Scan the linked-list node.
|
|
||||||
final int nodeArrayHeadPosition = buffer.position() - headerSize;
|
|
||||||
final int count = readCharGroupCount(buffer);
|
|
||||||
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
|
||||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
|
||||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
|
||||||
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
|
||||||
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
|
||||||
ArrayList<WeightedString> bigrams = null;
|
|
||||||
if (null != info.mBigrams) {
|
|
||||||
bigrams = new ArrayList<WeightedString>();
|
|
||||||
for (PendingAttribute bigram : info.mBigrams) {
|
|
||||||
final WeightedString word = getWordAtAddress(
|
|
||||||
buffer, headerSize, bigram.mAddress, options);
|
|
||||||
final int reconstructedFrequency =
|
|
||||||
reconstructBigramFrequency(word.mFrequency, bigram.mFrequency);
|
|
||||||
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
|
||||||
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
|
||||||
if (null == children) {
|
|
||||||
final int currentPosition = buffer.position();
|
|
||||||
buffer.position(info.mChildrenAddress + headerSize);
|
|
||||||
children = readNodeArray(
|
|
||||||
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
|
|
||||||
buffer.position(currentPosition);
|
|
||||||
}
|
|
||||||
nodeArrayContents.add(
|
|
||||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
|
||||||
info.mFrequency,
|
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
|
||||||
} else {
|
|
||||||
nodeArrayContents.add(
|
|
||||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
|
||||||
info.mFrequency,
|
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
|
||||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
|
|
||||||
}
|
|
||||||
groupOffset = info.mEndAddress;
|
|
||||||
}
|
|
||||||
|
|
||||||
// reach the end of the array.
|
|
||||||
if (options.mSupportsDynamicUpdate) {
|
|
||||||
final int nextAddress = buffer.readUnsignedInt24();
|
|
||||||
if (nextAddress >= 0 && nextAddress < buffer.limit()) {
|
|
||||||
buffer.position(nextAddress);
|
|
||||||
} else {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
} while (options.mSupportsDynamicUpdate &&
|
|
||||||
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
|
||||||
|
|
||||||
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
|
||||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
|
||||||
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
|
||||||
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
|
||||||
return nodeArray;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper function to get the binary format version from the header.
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
private static int getFormatVersion(final FusionDictionaryBufferInterface buffer)
|
|
||||||
throws IOException {
|
|
||||||
final int magic = buffer.readInt();
|
|
||||||
if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort();
|
|
||||||
return FormatSpec.NOT_A_VERSION_NUMBER;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper function to get and validate the binary format version.
|
|
||||||
* @throws UnsupportedFormatException
|
|
||||||
* @throws IOException
|
|
||||||
*/
|
|
||||||
static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
|
|
||||||
throws IOException, UnsupportedFormatException {
|
|
||||||
final int version = getFormatVersion(buffer);
|
|
||||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
|
||||||
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
|
||||||
throw new UnsupportedFormatException("This file has version " + version
|
|
||||||
+ ", but this implementation does not support versions above "
|
|
||||||
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
|
||||||
}
|
|
||||||
return version;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a header from a buffer.
|
|
||||||
* @param headerReader the header reader
|
|
||||||
* @throws IOException
|
|
||||||
* @throws UnsupportedFormatException
|
|
||||||
*/
|
|
||||||
public static FileHeader readHeader(final HeaderReaderInterface headerReader)
|
|
||||||
throws IOException, UnsupportedFormatException {
|
|
||||||
final int version = headerReader.readVersion();
|
|
||||||
final int optionsFlags = headerReader.readOptionFlags();
|
|
||||||
|
|
||||||
final int headerSize = headerReader.readHeaderSize();
|
|
||||||
|
|
||||||
if (headerSize < 0) {
|
|
||||||
throw new UnsupportedFormatException("header size can't be negative.");
|
|
||||||
}
|
|
||||||
|
|
||||||
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
|
|
||||||
|
|
||||||
final FileHeader header = new FileHeader(headerSize,
|
|
||||||
new FusionDictionary.DictionaryOptions(attributes,
|
|
||||||
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
|
||||||
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
|
||||||
new FormatOptions(version,
|
|
||||||
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
|
|
||||||
return header;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads options from a buffer and populate a map with their contents.
|
|
||||||
*
|
|
||||||
* The buffer is read at the current position, so the caller must take care the pointer
|
|
||||||
* is in the right place before calling this.
|
|
||||||
*/
|
|
||||||
public static void populateOptions(final FusionDictionaryBufferInterface buffer,
|
|
||||||
final int headerSize, final HashMap<String, String> options) {
|
|
||||||
while (buffer.position() < headerSize) {
|
|
||||||
final String key = CharEncoding.readString(buffer);
|
|
||||||
final String value = CharEncoding.readString(buffer);
|
|
||||||
options.put(key, value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Reads a buffer and returns the memory representation of the dictionary.
|
|
||||||
*
|
|
||||||
* This high-level method takes a buffer and reads its contents, populating a
|
|
||||||
* FusionDictionary structure. The optional dict argument is an existing dictionary to
|
|
||||||
* which words from the buffer should be added. If it is null, a new dictionary is created.
|
|
||||||
*
|
|
||||||
* @param reader the reader.
|
|
||||||
* @param dict an optional dictionary to add words to, or null.
|
|
||||||
* @return the created (or merged) dictionary.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader,
|
|
||||||
final FusionDictionary dict) throws FileNotFoundException, IOException,
|
|
||||||
UnsupportedFormatException {
|
|
||||||
|
|
||||||
// if the buffer has not been opened, open the buffer with bytebuffer.
|
|
||||||
if (reader.getBuffer() == null) reader.openBuffer(
|
|
||||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
|
||||||
if (reader.getBuffer() == null) {
|
|
||||||
MakedictLog.e("Cannot open the buffer");
|
|
||||||
}
|
|
||||||
|
|
||||||
// Read header
|
|
||||||
final FileHeader fileHeader = readHeader(reader);
|
|
||||||
|
|
||||||
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
|
||||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
|
||||||
final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
|
|
||||||
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
|
|
||||||
|
|
||||||
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
|
||||||
if (null != dict) {
|
|
||||||
for (final Word w : dict) {
|
|
||||||
if (w.mIsBlacklistEntry) {
|
|
||||||
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
|
|
||||||
} else {
|
|
||||||
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (final Word w : dict) {
|
|
||||||
// By construction a binary dictionary may not have bigrams pointing to
|
|
||||||
// words that are not also registered as unigrams so we don't have to avoid
|
|
||||||
// them explicitly here.
|
|
||||||
for (final WeightedString bigram : w.mBigrams) {
|
|
||||||
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return newDict;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
|
|
||||||
*/
|
|
||||||
public static boolean isBinaryDictionary(final String filename) {
|
|
||||||
final File file = new File(filename);
|
|
||||||
return isBinaryDictionary(file);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Basic test to find out whether the file is a binary dictionary or not.
|
|
||||||
*
|
|
||||||
* Concretely this only tests the magic number.
|
|
||||||
*
|
|
||||||
* @param file The file to test.
|
|
||||||
* @return true if it's a binary dictionary, false otherwise
|
|
||||||
*/
|
|
||||||
public static boolean isBinaryDictionary(final File file) {
|
|
||||||
FileInputStream inStream = null;
|
FileInputStream inStream = null;
|
||||||
try {
|
try {
|
||||||
inStream = new FileInputStream(file);
|
inStream = new FileInputStream(file);
|
||||||
final ByteBuffer buffer = inStream.getChannel().map(
|
final byte[] array = new byte[(int) file.length()];
|
||||||
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
inStream.read(array);
|
||||||
final int version = getFormatVersion(new ByteBufferWrapper(buffer));
|
return new ByteArrayDictBuffer(array);
|
||||||
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
|
|
||||||
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
|
||||||
} catch (FileNotFoundException e) {
|
|
||||||
return false;
|
|
||||||
} catch (IOException e) {
|
|
||||||
return false;
|
|
||||||
} finally {
|
} finally {
|
||||||
if (inStream != null) {
|
if (inStream != null) {
|
||||||
try {
|
|
||||||
inStream.close();
|
inStream.close();
|
||||||
} catch (IOException e) {
|
|
||||||
// do nothing
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Calculate bigram frequency from compressed value
|
* Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
|
||||||
*
|
*
|
||||||
* @param unigramFrequency
|
* This class doesn't perform as fast as other classes,
|
||||||
* @param bigramFrequency compressed frequency
|
* but this class is the only option available for destructive operations (insert or delete)
|
||||||
* @return approximate bigram frequency
|
* on a dictionary.
|
||||||
*/
|
*/
|
||||||
public static int reconstructBigramFrequency(final int unigramFrequency,
|
@UsedForTesting
|
||||||
final int bigramFrequency) {
|
public static final class DictionaryBufferFromWritableByteBufferFactory
|
||||||
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
|
implements DictionaryBufferFactory {
|
||||||
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
|
@Override
|
||||||
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
|
public DictBuffer getDictionaryBuffer(final File file)
|
||||||
return (int)resultFreqFloat;
|
throws FileNotFoundException, IOException {
|
||||||
|
RandomAccessFile raFile = null;
|
||||||
|
ByteBuffer buffer = null;
|
||||||
|
try {
|
||||||
|
raFile = new RandomAccessFile(file, "rw");
|
||||||
|
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
|
||||||
|
} finally {
|
||||||
|
if (raFile != null) {
|
||||||
|
raFile.close();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (buffer != null) {
|
||||||
|
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final File mDictionaryBinaryFile;
|
||||||
|
private DictBuffer mDictBuffer;
|
||||||
|
|
||||||
|
public BinaryDictDecoder(final File file) {
|
||||||
|
mDictionaryBinaryFile = file;
|
||||||
|
mDictBuffer = null;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void openDictBuffer(final DictionaryBufferFactory factory)
|
||||||
|
throws FileNotFoundException, IOException {
|
||||||
|
mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile);
|
||||||
|
}
|
||||||
|
|
||||||
|
public DictBuffer getDictBuffer() {
|
||||||
|
return mDictBuffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public DictBuffer openAndGetDictBuffer(
|
||||||
|
final DictionaryBufferFactory factory)
|
||||||
|
throws FileNotFoundException, IOException {
|
||||||
|
openDictBuffer(factory);
|
||||||
|
return getDictBuffer();
|
||||||
|
}
|
||||||
|
|
||||||
|
// The implementation of HeaderReader
|
||||||
|
@Override
|
||||||
|
public int readVersion() throws IOException, UnsupportedFormatException {
|
||||||
|
return BinaryDictDecoderUtils.checkFormatVersion(mDictBuffer);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readOptionFlags() {
|
||||||
|
return mDictBuffer.readUnsignedShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readHeaderSize() {
|
||||||
|
return mDictBuffer.readInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public HashMap<String, String> readAttributes(final int headerSize) {
|
||||||
|
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||||
|
while (mDictBuffer.position() < headerSize) {
|
||||||
|
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
|
||||||
|
// by calling CharEncoding.readString.
|
||||||
|
final String key = CharEncoding.readString(mDictBuffer);
|
||||||
|
final String value = CharEncoding.readString(mDictBuffer);
|
||||||
|
attributes.put(key, value);
|
||||||
|
}
|
||||||
|
mDictBuffer.position(headerSize);
|
||||||
|
return attributes;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -0,0 +1,777 @@
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2013 The Android Open Source Project
|
||||||
|
*
|
||||||
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
|
* you may not use this file except in compliance with the License.
|
||||||
|
* You may obtain a copy of the License at
|
||||||
|
*
|
||||||
|
* http://www.apache.org/licenses/LICENSE-2.0
|
||||||
|
*
|
||||||
|
* Unless required by applicable law or agreed to in writing, software
|
||||||
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||||
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||||
|
* See the License for the specific language governing permissions and
|
||||||
|
* limitations under the License.
|
||||||
|
*/
|
||||||
|
|
||||||
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
|
||||||
|
|
||||||
|
import java.io.ByteArrayOutputStream;
|
||||||
|
import java.io.File;
|
||||||
|
import java.io.FileInputStream;
|
||||||
|
import java.io.FileNotFoundException;
|
||||||
|
import java.io.IOException;
|
||||||
|
import java.nio.ByteBuffer;
|
||||||
|
import java.nio.channels.FileChannel;
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.Arrays;
|
||||||
|
import java.util.HashMap;
|
||||||
|
import java.util.Map;
|
||||||
|
import java.util.TreeMap;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Decodes binary files for a FusionDictionary.
|
||||||
|
*
|
||||||
|
* All the methods in this class are static.
|
||||||
|
*
|
||||||
|
* TODO: Remove calls from classes except BinaryDictDecoder
|
||||||
|
* TODO: Move this file to makedict/internal.
|
||||||
|
*/
|
||||||
|
public final class BinaryDictDecoderUtils {
|
||||||
|
|
||||||
|
private static final boolean DBG = MakedictLog.DBG;
|
||||||
|
|
||||||
|
private BinaryDictDecoderUtils() {
|
||||||
|
// This utility class is not publicly instantiable.
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int MAX_JUMPS = 12;
|
||||||
|
|
||||||
|
@UsedForTesting
|
||||||
|
public interface DictBuffer {
|
||||||
|
public int readUnsignedByte();
|
||||||
|
public int readUnsignedShort();
|
||||||
|
public int readUnsignedInt24();
|
||||||
|
public int readInt();
|
||||||
|
public int position();
|
||||||
|
public void position(int newPosition);
|
||||||
|
public void put(final byte b);
|
||||||
|
public int limit();
|
||||||
|
@UsedForTesting
|
||||||
|
public int capacity();
|
||||||
|
}
|
||||||
|
|
||||||
|
public static final class ByteBufferDictBuffer implements DictBuffer {
|
||||||
|
private ByteBuffer mBuffer;
|
||||||
|
|
||||||
|
public ByteBufferDictBuffer(final ByteBuffer buffer) {
|
||||||
|
mBuffer = buffer;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readUnsignedByte() {
|
||||||
|
return mBuffer.get() & 0xFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readUnsignedShort() {
|
||||||
|
return mBuffer.getShort() & 0xFFFF;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readUnsignedInt24() {
|
||||||
|
final int retval = readUnsignedByte();
|
||||||
|
return (retval << 16) + readUnsignedShort();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int readInt() {
|
||||||
|
return mBuffer.getInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int position() {
|
||||||
|
return mBuffer.position();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void position(int newPos) {
|
||||||
|
mBuffer.position(newPos);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void put(final byte b) {
|
||||||
|
mBuffer.put(b);
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int limit() {
|
||||||
|
return mBuffer.limit();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public int capacity() {
|
||||||
|
return mBuffer.capacity();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* A class grouping utility function for our specific character encoding.
|
||||||
|
*/
|
||||||
|
static final class CharEncoding {
|
||||||
|
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
|
||||||
|
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to find out whether this code fits on one byte
|
||||||
|
*/
|
||||||
|
private static boolean fitsOnOneByte(final int character) {
|
||||||
|
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
|
||||||
|
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the size of a character given its character code.
|
||||||
|
*
|
||||||
|
* Char format is:
|
||||||
|
* 1 byte = bbbbbbbb match
|
||||||
|
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
|
||||||
|
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
|
||||||
|
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
|
||||||
|
* 00011111 would be outside unicode.
|
||||||
|
* else: iso-latin-1 code
|
||||||
|
* This allows for the whole unicode range to be encoded, including chars outside of
|
||||||
|
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
|
||||||
|
* characters which should never happen anyway (and still work, but take 3 bytes).
|
||||||
|
*
|
||||||
|
* @param character the character code.
|
||||||
|
* @return the size in binary encoded-form, either 1 or 3 bytes.
|
||||||
|
*/
|
||||||
|
static int getCharSize(final int character) {
|
||||||
|
// See char encoding in FusionDictionary.java
|
||||||
|
if (fitsOnOneByte(character)) return 1;
|
||||||
|
if (FormatSpec.INVALID_CHARACTER == character) return 1;
|
||||||
|
return 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Compute the byte size of a character array.
|
||||||
|
*/
|
||||||
|
static int getCharArraySize(final int[] chars) {
|
||||||
|
int size = 0;
|
||||||
|
for (int character : chars) size += getCharSize(character);
|
||||||
|
return size;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a char array to a byte buffer.
|
||||||
|
*
|
||||||
|
* @param codePoints the code point array to write.
|
||||||
|
* @param buffer the byte buffer to write to.
|
||||||
|
* @param index the index in buffer to write the character array to.
|
||||||
|
* @return the index after the last character.
|
||||||
|
*/
|
||||||
|
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
|
||||||
|
for (int codePoint : codePoints) {
|
||||||
|
if (1 == getCharSize(codePoint)) {
|
||||||
|
buffer[index++] = (byte)codePoint;
|
||||||
|
} else {
|
||||||
|
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
||||||
|
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
||||||
|
buffer[index++] = (byte)(0xFF & codePoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return index;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a string with our character format to a byte buffer.
|
||||||
|
*
|
||||||
|
* This will also write the terminator byte.
|
||||||
|
*
|
||||||
|
* @param buffer the byte buffer to write to.
|
||||||
|
* @param origin the offset to write from.
|
||||||
|
* @param word the string to write.
|
||||||
|
* @return the size written, in bytes.
|
||||||
|
*/
|
||||||
|
static int writeString(final byte[] buffer, final int origin,
|
||||||
|
final String word) {
|
||||||
|
final int length = word.length();
|
||||||
|
int index = origin;
|
||||||
|
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||||
|
final int codePoint = word.codePointAt(i);
|
||||||
|
if (1 == getCharSize(codePoint)) {
|
||||||
|
buffer[index++] = (byte)codePoint;
|
||||||
|
} else {
|
||||||
|
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
||||||
|
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
||||||
|
buffer[index++] = (byte)(0xFF & codePoint);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
|
||||||
|
return index - origin;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Writes a string with our character format to a ByteArrayOutputStream.
|
||||||
|
*
|
||||||
|
* This will also write the terminator byte.
|
||||||
|
*
|
||||||
|
* @param buffer the ByteArrayOutputStream to write to.
|
||||||
|
* @param word the string to write.
|
||||||
|
*/
|
||||||
|
static void writeString(final ByteArrayOutputStream buffer, final String word) {
|
||||||
|
final int length = word.length();
|
||||||
|
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||||
|
final int codePoint = word.codePointAt(i);
|
||||||
|
if (1 == getCharSize(codePoint)) {
|
||||||
|
buffer.write((byte) codePoint);
|
||||||
|
} else {
|
||||||
|
buffer.write((byte) (0xFF & (codePoint >> 16)));
|
||||||
|
buffer.write((byte) (0xFF & (codePoint >> 8)));
|
||||||
|
buffer.write((byte) (0xFF & codePoint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a string from a DictBuffer. This is the converse of the above method.
|
||||||
|
*/
|
||||||
|
static String readString(final DictBuffer dictBuffer) {
|
||||||
|
final StringBuilder s = new StringBuilder();
|
||||||
|
int character = readChar(dictBuffer);
|
||||||
|
while (character != FormatSpec.INVALID_CHARACTER) {
|
||||||
|
s.appendCodePoint(character);
|
||||||
|
character = readChar(dictBuffer);
|
||||||
|
}
|
||||||
|
return s.toString();
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a character from the buffer.
|
||||||
|
*
|
||||||
|
* This follows the character format documented earlier in this source file.
|
||||||
|
*
|
||||||
|
* @param dictBuffer the buffer, positioned over an encoded character.
|
||||||
|
* @return the character code.
|
||||||
|
*/
|
||||||
|
static int readChar(final DictBuffer dictBuffer) {
|
||||||
|
int character = dictBuffer.readUnsignedByte();
|
||||||
|
if (!fitsOnOneByte(character)) {
|
||||||
|
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
|
||||||
|
return FormatSpec.INVALID_CHARACTER;
|
||||||
|
}
|
||||||
|
character <<= 16;
|
||||||
|
character += dictBuffer.readUnsignedShort();
|
||||||
|
}
|
||||||
|
return character;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Input methods: Read a binary dictionary to memory.
|
||||||
|
// readDictionaryBinary is the public entry point for them.
|
||||||
|
|
||||||
|
static int readChildrenAddress(final DictBuffer dictBuffer,
|
||||||
|
final int optionFlags, final FormatOptions options) {
|
||||||
|
if (options.mSupportsDynamicUpdate) {
|
||||||
|
final int address = dictBuffer.readUnsignedInt24();
|
||||||
|
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||||
|
if ((address & FormatSpec.MSB24) != 0) {
|
||||||
|
return -(address & FormatSpec.SINT24_MAX);
|
||||||
|
} else {
|
||||||
|
return address;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
int address;
|
||||||
|
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
|
||||||
|
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
||||||
|
return dictBuffer.readUnsignedByte();
|
||||||
|
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
||||||
|
return dictBuffer.readUnsignedShort();
|
||||||
|
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
||||||
|
return dictBuffer.readUnsignedInt24();
|
||||||
|
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
|
||||||
|
default:
|
||||||
|
return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static int readParentAddress(final DictBuffer dictBuffer,
|
||||||
|
final FormatOptions formatOptions) {
|
||||||
|
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||||
|
final int parentAddress = dictBuffer.readUnsignedInt24();
|
||||||
|
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
|
||||||
|
return sign * (parentAddress & FormatSpec.SINT24_MAX);
|
||||||
|
} else {
|
||||||
|
return FormatSpec.NO_PARENT_ADDRESS;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
|
||||||
|
public static CharGroupInfo readCharGroup(final DictBuffer dictBuffer,
|
||||||
|
final int originalGroupAddress, final FormatOptions options) {
|
||||||
|
int addressPointer = originalGroupAddress;
|
||||||
|
final int flags = dictBuffer.readUnsignedByte();
|
||||||
|
++addressPointer;
|
||||||
|
|
||||||
|
final int parentAddress = readParentAddress(dictBuffer, options);
|
||||||
|
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
|
||||||
|
addressPointer += 3;
|
||||||
|
}
|
||||||
|
|
||||||
|
final int characters[];
|
||||||
|
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
|
||||||
|
int index = 0;
|
||||||
|
int character = CharEncoding.readChar(dictBuffer);
|
||||||
|
addressPointer += CharEncoding.getCharSize(character);
|
||||||
|
while (-1 != character) {
|
||||||
|
// FusionDictionary is making sure that the length of the word is smaller than
|
||||||
|
// MAX_WORD_LENGTH.
|
||||||
|
// So we'll never write past the end of CHARACTER_BUFFER.
|
||||||
|
CHARACTER_BUFFER[index++] = character;
|
||||||
|
character = CharEncoding.readChar(dictBuffer);
|
||||||
|
addressPointer += CharEncoding.getCharSize(character);
|
||||||
|
}
|
||||||
|
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
|
||||||
|
} else {
|
||||||
|
final int character = CharEncoding.readChar(dictBuffer);
|
||||||
|
addressPointer += CharEncoding.getCharSize(character);
|
||||||
|
characters = new int[] { character };
|
||||||
|
}
|
||||||
|
final int frequency;
|
||||||
|
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
|
||||||
|
++addressPointer;
|
||||||
|
frequency = dictBuffer.readUnsignedByte();
|
||||||
|
} else {
|
||||||
|
frequency = CharGroup.NOT_A_TERMINAL;
|
||||||
|
}
|
||||||
|
int childrenAddress = readChildrenAddress(dictBuffer, flags, options);
|
||||||
|
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||||
|
childrenAddress += addressPointer;
|
||||||
|
}
|
||||||
|
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||||
|
ArrayList<WeightedString> shortcutTargets = null;
|
||||||
|
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
||||||
|
final int pointerBefore = dictBuffer.position();
|
||||||
|
shortcutTargets = new ArrayList<WeightedString>();
|
||||||
|
dictBuffer.readUnsignedShort(); // Skip the size
|
||||||
|
while (true) {
|
||||||
|
final int targetFlags = dictBuffer.readUnsignedByte();
|
||||||
|
final String word = CharEncoding.readString(dictBuffer);
|
||||||
|
shortcutTargets.add(new WeightedString(word,
|
||||||
|
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
|
||||||
|
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
||||||
|
}
|
||||||
|
addressPointer += dictBuffer.position() - pointerBefore;
|
||||||
|
}
|
||||||
|
ArrayList<PendingAttribute> bigrams = null;
|
||||||
|
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
||||||
|
bigrams = new ArrayList<PendingAttribute>();
|
||||||
|
int bigramCount = 0;
|
||||||
|
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||||
|
final int bigramFlags = dictBuffer.readUnsignedByte();
|
||||||
|
++addressPointer;
|
||||||
|
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
|
||||||
|
? 1 : -1;
|
||||||
|
int bigramAddress = addressPointer;
|
||||||
|
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||||
|
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||||
|
bigramAddress += sign * dictBuffer.readUnsignedByte();
|
||||||
|
addressPointer += 1;
|
||||||
|
break;
|
||||||
|
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||||
|
bigramAddress += sign * dictBuffer.readUnsignedShort();
|
||||||
|
addressPointer += 2;
|
||||||
|
break;
|
||||||
|
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||||
|
final int offset = (dictBuffer.readUnsignedByte() << 16)
|
||||||
|
+ dictBuffer.readUnsignedShort();
|
||||||
|
bigramAddress += sign * offset;
|
||||||
|
addressPointer += 3;
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
throw new RuntimeException("Has bigrams with no address");
|
||||||
|
}
|
||||||
|
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
|
||||||
|
bigramAddress));
|
||||||
|
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
||||||
|
}
|
||||||
|
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||||
|
MakedictLog.d("too many bigrams in a group.");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
|
||||||
|
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads and returns the char group count out of a buffer and forwards the pointer.
|
||||||
|
*/
|
||||||
|
public static int readCharGroupCount(final DictBuffer dictBuffer) {
|
||||||
|
final int msb = dictBuffer.readUnsignedByte();
|
||||||
|
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
|
||||||
|
return msb;
|
||||||
|
} else {
|
||||||
|
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
|
||||||
|
+ dictBuffer.readUnsignedByte();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Finds, as a string, the word at the address passed as an argument.
|
||||||
|
*
|
||||||
|
* @param dictBuffer the buffer to read from.
|
||||||
|
* @param headerSize the size of the header.
|
||||||
|
* @param address the address to seek.
|
||||||
|
* @param formatOptions file format options.
|
||||||
|
* @return the word with its frequency, as a weighted string.
|
||||||
|
*/
|
||||||
|
/* package for tests */ static WeightedString getWordAtAddress(
|
||||||
|
final DictBuffer dictBuffer, final int headerSize, final int address,
|
||||||
|
final FormatOptions formatOptions) {
|
||||||
|
final WeightedString result;
|
||||||
|
final int originalPointer = dictBuffer.position();
|
||||||
|
dictBuffer.position(address);
|
||||||
|
|
||||||
|
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||||
|
result = getWordAtAddressWithParentAddress(dictBuffer, headerSize, address,
|
||||||
|
formatOptions);
|
||||||
|
} else {
|
||||||
|
result = getWordAtAddressWithoutParentAddress(dictBuffer, headerSize, address,
|
||||||
|
formatOptions);
|
||||||
|
}
|
||||||
|
|
||||||
|
dictBuffer.position(originalPointer);
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
@SuppressWarnings("unused")
|
||||||
|
private static WeightedString getWordAtAddressWithParentAddress(
|
||||||
|
final DictBuffer dictBuffer, final int headerSize, final int address,
|
||||||
|
final FormatOptions options) {
|
||||||
|
int currentAddress = address;
|
||||||
|
int frequency = Integer.MIN_VALUE;
|
||||||
|
final StringBuilder builder = new StringBuilder();
|
||||||
|
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
||||||
|
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
|
||||||
|
CharGroupInfo currentInfo;
|
||||||
|
int loopCounter = 0;
|
||||||
|
do {
|
||||||
|
dictBuffer.position(currentAddress + headerSize);
|
||||||
|
currentInfo = readCharGroup(dictBuffer, currentAddress, options);
|
||||||
|
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
|
||||||
|
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||||
|
}
|
||||||
|
if (DBG && loopCounter++ > MAX_JUMPS) {
|
||||||
|
MakedictLog.d("Too many jumps - probably a bug");
|
||||||
|
}
|
||||||
|
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
|
||||||
|
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
|
||||||
|
builder.insert(0,
|
||||||
|
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
|
||||||
|
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
|
||||||
|
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||||
|
}
|
||||||
|
return new WeightedString(builder.toString(), frequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
private static WeightedString getWordAtAddressWithoutParentAddress(
|
||||||
|
final DictBuffer dictBuffer, final int headerSize, final int address,
|
||||||
|
final FormatOptions options) {
|
||||||
|
dictBuffer.position(headerSize);
|
||||||
|
final int count = readCharGroupCount(dictBuffer);
|
||||||
|
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
|
||||||
|
final StringBuilder builder = new StringBuilder();
|
||||||
|
WeightedString result = null;
|
||||||
|
|
||||||
|
CharGroupInfo last = null;
|
||||||
|
for (int i = count - 1; i >= 0; --i) {
|
||||||
|
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
|
||||||
|
groupOffset = info.mEndAddress;
|
||||||
|
if (info.mOriginalAddress == address) {
|
||||||
|
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
|
||||||
|
result = new WeightedString(builder.toString(), info.mFrequency);
|
||||||
|
break; // and return
|
||||||
|
}
|
||||||
|
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
|
if (info.mChildrenAddress > address) {
|
||||||
|
if (null == last) continue;
|
||||||
|
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||||
|
dictBuffer.position(last.mChildrenAddress + headerSize);
|
||||||
|
i = readCharGroupCount(dictBuffer);
|
||||||
|
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||||
|
last = null;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
last = info;
|
||||||
|
}
|
||||||
|
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
|
||||||
|
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||||
|
dictBuffer.position(last.mChildrenAddress + headerSize);
|
||||||
|
i = readCharGroupCount(dictBuffer);
|
||||||
|
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||||
|
last = null;
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return result;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a single node array from a buffer.
|
||||||
|
*
|
||||||
|
* This methods reads the file at the current position. A node array is fully expected to start
|
||||||
|
* at the current position.
|
||||||
|
* This will recursively read other node arrays into the structure, populating the reverse
|
||||||
|
* maps on the fly and using them to keep track of already read nodes.
|
||||||
|
*
|
||||||
|
* @param dictBuffer the buffer, correctly positioned at the start of a node array.
|
||||||
|
* @param headerSize the size, in bytes, of the file header.
|
||||||
|
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
|
||||||
|
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
||||||
|
* @param options file format options.
|
||||||
|
* @return the read node array with all his children already read.
|
||||||
|
*/
|
||||||
|
private static PtNodeArray readNodeArray(final DictBuffer dictBuffer,
|
||||||
|
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
|
||||||
|
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
|
||||||
|
throws IOException {
|
||||||
|
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
||||||
|
final int nodeArrayOrigin = dictBuffer.position() - headerSize;
|
||||||
|
|
||||||
|
do { // Scan the linked-list node.
|
||||||
|
final int nodeArrayHeadPosition = dictBuffer.position() - headerSize;
|
||||||
|
final int count = readCharGroupCount(dictBuffer);
|
||||||
|
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
||||||
|
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||||
|
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
|
||||||
|
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
||||||
|
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
||||||
|
ArrayList<WeightedString> bigrams = null;
|
||||||
|
if (null != info.mBigrams) {
|
||||||
|
bigrams = new ArrayList<WeightedString>();
|
||||||
|
for (PendingAttribute bigram : info.mBigrams) {
|
||||||
|
final WeightedString word = getWordAtAddress(
|
||||||
|
dictBuffer, headerSize, bigram.mAddress, options);
|
||||||
|
final int reconstructedFrequency =
|
||||||
|
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
|
||||||
|
bigram.mFrequency);
|
||||||
|
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
|
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
||||||
|
if (null == children) {
|
||||||
|
final int currentPosition = dictBuffer.position();
|
||||||
|
dictBuffer.position(info.mChildrenAddress + headerSize);
|
||||||
|
children = readNodeArray(dictBuffer, headerSize, reverseNodeArrayMap,
|
||||||
|
reverseGroupMap, options);
|
||||||
|
dictBuffer.position(currentPosition);
|
||||||
|
}
|
||||||
|
nodeArrayContents.add(
|
||||||
|
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||||
|
info.mFrequency,
|
||||||
|
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||||
|
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
||||||
|
} else {
|
||||||
|
nodeArrayContents.add(
|
||||||
|
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||||
|
info.mFrequency,
|
||||||
|
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||||
|
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
|
||||||
|
}
|
||||||
|
groupOffset = info.mEndAddress;
|
||||||
|
}
|
||||||
|
|
||||||
|
// reach the end of the array.
|
||||||
|
if (options.mSupportsDynamicUpdate) {
|
||||||
|
final int nextAddress = dictBuffer.readUnsignedInt24();
|
||||||
|
if (nextAddress >= 0 && nextAddress < dictBuffer.limit()) {
|
||||||
|
dictBuffer.position(nextAddress);
|
||||||
|
} else {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} while (options.mSupportsDynamicUpdate &&
|
||||||
|
dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||||
|
|
||||||
|
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||||
|
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
||||||
|
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
||||||
|
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
||||||
|
return nodeArray;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to get the binary format version from the header.
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
private static int getFormatVersion(final DictBuffer dictBuffer)
|
||||||
|
throws IOException {
|
||||||
|
final int magic = dictBuffer.readInt();
|
||||||
|
if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort();
|
||||||
|
return FormatSpec.NOT_A_VERSION_NUMBER;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper function to get and validate the binary format version.
|
||||||
|
* @throws UnsupportedFormatException
|
||||||
|
* @throws IOException
|
||||||
|
*/
|
||||||
|
static int checkFormatVersion(final DictBuffer dictBuffer)
|
||||||
|
throws IOException, UnsupportedFormatException {
|
||||||
|
final int version = getFormatVersion(dictBuffer);
|
||||||
|
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||||
|
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
||||||
|
throw new UnsupportedFormatException("This file has version " + version
|
||||||
|
+ ", but this implementation does not support versions above "
|
||||||
|
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
||||||
|
}
|
||||||
|
return version;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a header from a buffer.
|
||||||
|
* @param headerReader the header reader
|
||||||
|
* @throws IOException
|
||||||
|
* @throws UnsupportedFormatException
|
||||||
|
*/
|
||||||
|
public static FileHeader readHeader(final HeaderReader headerReader)
|
||||||
|
throws IOException, UnsupportedFormatException {
|
||||||
|
final int version = headerReader.readVersion();
|
||||||
|
final int optionsFlags = headerReader.readOptionFlags();
|
||||||
|
|
||||||
|
final int headerSize = headerReader.readHeaderSize();
|
||||||
|
|
||||||
|
if (headerSize < 0) {
|
||||||
|
throw new UnsupportedFormatException("header size can't be negative.");
|
||||||
|
}
|
||||||
|
|
||||||
|
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
|
||||||
|
|
||||||
|
final FileHeader header = new FileHeader(headerSize,
|
||||||
|
new FusionDictionary.DictionaryOptions(attributes,
|
||||||
|
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||||
|
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
||||||
|
new FormatOptions(version,
|
||||||
|
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
|
||||||
|
return header;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads options from a buffer and populate a map with their contents.
|
||||||
|
*
|
||||||
|
* The buffer is read at the current position, so the caller must take care the pointer
|
||||||
|
* is in the right place before calling this.
|
||||||
|
*/
|
||||||
|
public static void populateOptions(final DictBuffer dictBuffer,
|
||||||
|
final int headerSize, final HashMap<String, String> options) {
|
||||||
|
while (dictBuffer.position() < headerSize) {
|
||||||
|
final String key = CharEncoding.readString(dictBuffer);
|
||||||
|
final String value = CharEncoding.readString(dictBuffer);
|
||||||
|
options.put(key, value);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Reads a buffer and returns the memory representation of the dictionary.
|
||||||
|
*
|
||||||
|
* This high-level method takes a buffer and reads its contents, populating a
|
||||||
|
* FusionDictionary structure. The optional dict argument is an existing dictionary to
|
||||||
|
* which words from the buffer should be added. If it is null, a new dictionary is created.
|
||||||
|
*
|
||||||
|
* @param dictDecoder the dict decoder.
|
||||||
|
* @param dict an optional dictionary to add words to, or null.
|
||||||
|
* @return the created (or merged) dictionary.
|
||||||
|
*/
|
||||||
|
@UsedForTesting
|
||||||
|
public static FusionDictionary readDictionaryBinary(final BinaryDictDecoder dictDecoder,
|
||||||
|
final FusionDictionary dict) throws FileNotFoundException, IOException,
|
||||||
|
UnsupportedFormatException {
|
||||||
|
|
||||||
|
// if the buffer has not been opened, open the buffer with bytebuffer.
|
||||||
|
if (dictDecoder.getDictBuffer() == null) dictDecoder.openDictBuffer(
|
||||||
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
|
if (dictDecoder.getDictBuffer() == null) {
|
||||||
|
MakedictLog.e("Cannot open the buffer");
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read header
|
||||||
|
final FileHeader fileHeader = readHeader(dictDecoder);
|
||||||
|
|
||||||
|
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||||
|
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||||
|
final PtNodeArray root = readNodeArray(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize,
|
||||||
|
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
|
||||||
|
|
||||||
|
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
||||||
|
if (null != dict) {
|
||||||
|
for (final Word w : dict) {
|
||||||
|
if (w.mIsBlacklistEntry) {
|
||||||
|
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
|
||||||
|
} else {
|
||||||
|
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
for (final Word w : dict) {
|
||||||
|
// By construction a binary dictionary may not have bigrams pointing to
|
||||||
|
// words that are not also registered as unigrams so we don't have to avoid
|
||||||
|
// them explicitly here.
|
||||||
|
for (final WeightedString bigram : w.mBigrams) {
|
||||||
|
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return newDict;
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
|
||||||
|
*/
|
||||||
|
public static boolean isBinaryDictionary(final String filename) {
|
||||||
|
final File file = new File(filename);
|
||||||
|
return isBinaryDictionary(file);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Basic test to find out whether the file is a binary dictionary or not.
|
||||||
|
*
|
||||||
|
* Concretely this only tests the magic number.
|
||||||
|
*
|
||||||
|
* @param file The file to test.
|
||||||
|
* @return true if it's a binary dictionary, false otherwise
|
||||||
|
*/
|
||||||
|
public static boolean isBinaryDictionary(final File file) {
|
||||||
|
FileInputStream inStream = null;
|
||||||
|
try {
|
||||||
|
inStream = new FileInputStream(file);
|
||||||
|
final ByteBuffer buffer = inStream.getChannel().map(
|
||||||
|
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
||||||
|
final int version = getFormatVersion(new ByteBufferDictBuffer(buffer));
|
||||||
|
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||||
|
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
||||||
|
} catch (FileNotFoundException e) {
|
||||||
|
return false;
|
||||||
|
} catch (IOException e) {
|
||||||
|
return false;
|
||||||
|
} finally {
|
||||||
|
if (inStream != null) {
|
||||||
|
try {
|
||||||
|
inStream.close();
|
||||||
|
} catch (IOException e) {
|
||||||
|
// do nothing
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -16,7 +16,7 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
|
|
|
@ -18,13 +18,13 @@ package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.Constants;
|
import com.android.inputmethod.latin.Constants;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
||||||
|
|
||||||
import java.io.File;
|
import java.io.File;
|
||||||
import java.io.FileInputStream;
|
import java.io.FileInputStream;
|
||||||
|
@ -62,7 +62,7 @@ public final class BinaryDictIOUtils {
|
||||||
* Retrieves all node arrays without recursive call.
|
* Retrieves all node arrays without recursive call.
|
||||||
*/
|
*/
|
||||||
private static void readUnigramsAndBigramsBinaryInner(
|
private static void readUnigramsAndBigramsBinaryInner(
|
||||||
final FusionDictionaryBufferInterface buffer, final int headerSize,
|
final DictBuffer dictBuffer, final int headerSize,
|
||||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
|
@ -82,11 +82,11 @@ public final class BinaryDictIOUtils {
|
||||||
p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
|
p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (buffer.position() != p.mAddress) buffer.position(p.mAddress);
|
if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress);
|
||||||
if (index != p.mLength) index = p.mLength;
|
if (index != p.mLength) index = p.mLength;
|
||||||
|
|
||||||
if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
|
if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
|
||||||
p.mNumOfCharGroup = BinaryDictDecoder.readCharGroupCount(buffer);
|
p.mNumOfCharGroup = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||||
p.mAddress += getGroupCountSize(p.mNumOfCharGroup);
|
p.mAddress += getGroupCountSize(p.mNumOfCharGroup);
|
||||||
p.mPosition = 0;
|
p.mPosition = 0;
|
||||||
}
|
}
|
||||||
|
@ -94,7 +94,7 @@ public final class BinaryDictIOUtils {
|
||||||
stack.pop();
|
stack.pop();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
CharGroupInfo info = BinaryDictDecoder.readCharGroup(buffer,
|
CharGroupInfo info = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
|
||||||
p.mAddress - headerSize, formatOptions);
|
p.mAddress - headerSize, formatOptions);
|
||||||
for (int i = 0; i < info.mCharacters.length; ++i) {
|
for (int i = 0; i < info.mCharacters.length; ++i) {
|
||||||
pushedChars[index++] = info.mCharacters[i];
|
pushedChars[index++] = info.mCharacters[i];
|
||||||
|
@ -114,7 +114,7 @@ public final class BinaryDictIOUtils {
|
||||||
|
|
||||||
if (p.mPosition == p.mNumOfCharGroup) {
|
if (p.mPosition == p.mNumOfCharGroup) {
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.mSupportsDynamicUpdate) {
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||||
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||||
// The node array has a forward link.
|
// The node array has a forward link.
|
||||||
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
|
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
|
||||||
|
@ -127,7 +127,7 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// The node array has more groups.
|
// The node array has more groups.
|
||||||
p.mAddress = buffer.position();
|
p.mAddress = dictBuffer.position();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
|
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
|
||||||
|
@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
|
||||||
* Reads unigrams and bigrams from the binary file.
|
* Reads unigrams and bigrams from the binary file.
|
||||||
* Doesn't store a full memory representation of the dictionary.
|
* Doesn't store a full memory representation of the dictionary.
|
||||||
*
|
*
|
||||||
* @param dictReader the dict reader.
|
* @param dictDecoder the dict decoder.
|
||||||
* @param words the map to store the address as a key and the word as a value.
|
* @param words the map to store the address as a key and the word as a value.
|
||||||
* @param frequencies the map to store the address as a key and the frequency as a value.
|
* @param frequencies the map to store the address as a key and the frequency as a value.
|
||||||
* @param bigrams the map to store the address as a key and the list of address as a value.
|
* @param bigrams the map to store the address as a key and the list of address as a value.
|
||||||
* @throws IOException if the file can't be read.
|
* @throws IOException if the file can't be read.
|
||||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||||
*/
|
*/
|
||||||
public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader,
|
public static void readUnigramsAndBigramsBinary(final BinaryDictDecoder dictDecoder,
|
||||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
||||||
UnsupportedFormatException {
|
UnsupportedFormatException {
|
||||||
// Read header
|
// Read header
|
||||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words,
|
readUnigramsAndBigramsBinaryInner(dictDecoder.getDictBuffer(), header.mHeaderSize, words,
|
||||||
frequencies, bigrams, header.mFormatOptions);
|
frequencies, bigrams, header.mFormatOptions);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -162,32 +162,32 @@ public final class BinaryDictIOUtils {
|
||||||
* Gets the address of the last CharGroup of the exact matching word in the dictionary.
|
* Gets the address of the last CharGroup of the exact matching word in the dictionary.
|
||||||
* If no match is found, returns NOT_VALID_WORD.
|
* If no match is found, returns NOT_VALID_WORD.
|
||||||
*
|
*
|
||||||
* @param dictReader the dict reader.
|
* @param dictDecoder the dict decoder.
|
||||||
* @param word the word we search for.
|
* @param word the word we search for.
|
||||||
* @return the address of the terminal node.
|
* @return the address of the terminal node.
|
||||||
* @throws IOException if the file can't be read.
|
* @throws IOException if the file can't be read.
|
||||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static int getTerminalPosition(final BinaryDictReader dictReader,
|
public static int getTerminalPosition(final BinaryDictDecoder dictDecoder,
|
||||||
final String word) throws IOException, UnsupportedFormatException {
|
final String word) throws IOException, UnsupportedFormatException {
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
if (word == null) return FormatSpec.NOT_VALID_WORD;
|
if (word == null) return FormatSpec.NOT_VALID_WORD;
|
||||||
if (buffer.position() != 0) buffer.position(0);
|
if (dictBuffer.position() != 0) dictBuffer.position(0);
|
||||||
|
|
||||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
int wordPos = 0;
|
int wordPos = 0;
|
||||||
final int wordLen = word.codePointCount(0, word.length());
|
final int wordLen = word.codePointCount(0, word.length());
|
||||||
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
||||||
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
|
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
|
||||||
|
|
||||||
do {
|
do {
|
||||||
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer);
|
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||||
boolean foundNextCharGroup = false;
|
boolean foundNextCharGroup = false;
|
||||||
for (int i = 0; i < charGroupCount; ++i) {
|
for (int i = 0; i < charGroupCount; ++i) {
|
||||||
final int charGroupPos = buffer.position();
|
final int charGroupPos = dictBuffer.position();
|
||||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(
|
||||||
buffer.position(), header.mFormatOptions);
|
dictBuffer, dictBuffer.position(), header.mFormatOptions);
|
||||||
final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags,
|
final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags,
|
||||||
header.mFormatOptions);
|
header.mFormatOptions);
|
||||||
final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags,
|
final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags,
|
||||||
|
@ -219,7 +219,7 @@ public final class BinaryDictIOUtils {
|
||||||
return FormatSpec.NOT_VALID_WORD;
|
return FormatSpec.NOT_VALID_WORD;
|
||||||
}
|
}
|
||||||
foundNextCharGroup = true;
|
foundNextCharGroup = true;
|
||||||
buffer.position(currentInfo.mChildrenAddress);
|
dictBuffer.position(currentInfo.mChildrenAddress);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -233,11 +233,11 @@ public final class BinaryDictIOUtils {
|
||||||
return FormatSpec.NOT_VALID_WORD;
|
return FormatSpec.NOT_VALID_WORD;
|
||||||
}
|
}
|
||||||
|
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||||
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||||
return FormatSpec.NOT_VALID_WORD;
|
return FormatSpec.NOT_VALID_WORD;
|
||||||
}
|
}
|
||||||
buffer.position(forwardLinkAddress);
|
dictBuffer.position(forwardLinkAddress);
|
||||||
} while(true);
|
} while(true);
|
||||||
}
|
}
|
||||||
return FormatSpec.NOT_VALID_WORD;
|
return FormatSpec.NOT_VALID_WORD;
|
||||||
|
@ -246,12 +246,12 @@ public final class BinaryDictIOUtils {
|
||||||
/**
|
/**
|
||||||
* @return the size written, in bytes. Always 3 bytes.
|
* @return the size written, in bytes. Always 3 bytes.
|
||||||
*/
|
*/
|
||||||
static int writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer,
|
static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
|
||||||
final int value) {
|
final int value) {
|
||||||
final int absValue = Math.abs(value);
|
final int absValue = Math.abs(value);
|
||||||
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
||||||
buffer.put((byte)((absValue >> 8) & 0xFF));
|
dictBuffer.put((byte)((absValue >> 8) & 0xFF));
|
||||||
buffer.put((byte)(absValue & 0xFF));
|
dictBuffer.put((byte)(absValue & 0xFF));
|
||||||
return 3;
|
return 3;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -289,31 +289,31 @@ public final class BinaryDictIOUtils {
|
||||||
return BinaryDictEncoder.getByteSize(value);
|
return BinaryDictEncoder.getByteSize(value);
|
||||||
}
|
}
|
||||||
|
|
||||||
static void skipCharGroup(final FusionDictionaryBufferInterface buffer,
|
static void skipCharGroup(final DictBuffer dictBuffer,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
final int flags = buffer.readUnsignedByte();
|
final int flags = dictBuffer.readUnsignedByte();
|
||||||
BinaryDictDecoder.readParentAddress(buffer, formatOptions);
|
BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions);
|
||||||
skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||||
BinaryDictDecoder.readChildrenAddress(buffer, flags, formatOptions);
|
BinaryDictDecoderUtils.readChildrenAddress(dictBuffer, flags, formatOptions);
|
||||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
|
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
|
||||||
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
|
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
|
||||||
final int shortcutsSize = buffer.readUnsignedShort();
|
final int shortcutsSize = dictBuffer.readUnsignedShort();
|
||||||
buffer.position(buffer.position() + shortcutsSize
|
dictBuffer.position(dictBuffer.position() + shortcutsSize
|
||||||
- FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE);
|
- FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE);
|
||||||
}
|
}
|
||||||
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
|
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
|
||||||
int bigramCount = 0;
|
int bigramCount = 0;
|
||||||
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||||
final int bigramFlags = buffer.readUnsignedByte();
|
final int bigramFlags = dictBuffer.readUnsignedByte();
|
||||||
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||||
buffer.readUnsignedByte();
|
dictBuffer.readUnsignedByte();
|
||||||
break;
|
break;
|
||||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||||
buffer.readUnsignedShort();
|
dictBuffer.readUnsignedShort();
|
||||||
break;
|
break;
|
||||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||||
buffer.readUnsignedInt24();
|
dictBuffer.readUnsignedInt24();
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break;
|
if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break;
|
||||||
|
@ -324,15 +324,15 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void skipString(final FusionDictionaryBufferInterface buffer,
|
static void skipString(final DictBuffer dictBuffer,
|
||||||
final boolean hasMultipleChars) {
|
final boolean hasMultipleChars) {
|
||||||
if (hasMultipleChars) {
|
if (hasMultipleChars) {
|
||||||
int character = CharEncoding.readChar(buffer);
|
int character = CharEncoding.readChar(dictBuffer);
|
||||||
while (character != FormatSpec.INVALID_CHARACTER) {
|
while (character != FormatSpec.INVALID_CHARACTER) {
|
||||||
character = CharEncoding.readChar(buffer);
|
character = CharEncoding.readChar(dictBuffer);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
CharEncoding.readChar(buffer);
|
CharEncoding.readChar(dictBuffer);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -508,24 +508,25 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Find a word using the BinaryDictReader.
|
* Find a word using the BinaryDictDecoder.
|
||||||
*
|
*
|
||||||
* @param dictReader the dict reader
|
* @param dictDecoder the dict reader
|
||||||
* @param word the word searched
|
* @param word the word searched
|
||||||
* @return the found group
|
* @return the found group
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws UnsupportedFormatException
|
* @throws UnsupportedFormatException
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader,
|
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictDecoder dictDecoder,
|
||||||
final String word) throws IOException, UnsupportedFormatException {
|
final String word) throws IOException, UnsupportedFormatException {
|
||||||
int position = getTerminalPosition(dictReader, word);
|
int position = getTerminalPosition(dictDecoder, word);
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
if (position != FormatSpec.NOT_VALID_WORD) {
|
if (position != FormatSpec.NOT_VALID_WORD) {
|
||||||
buffer.position(0);
|
dictBuffer.position(0);
|
||||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
buffer.position(position);
|
dictBuffer.position(position);
|
||||||
return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions);
|
return BinaryDictDecoderUtils.readCharGroup(dictBuffer, position,
|
||||||
|
header.mFormatOptions);
|
||||||
}
|
}
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
@ -544,21 +545,21 @@ public final class BinaryDictIOUtils {
|
||||||
final File file, final long offset, final long length)
|
final File file, final long offset, final long length)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
|
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
|
||||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() {
|
dictDecoder.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFactory() {
|
||||||
@Override
|
@Override
|
||||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file)
|
public DictBuffer getDictionaryBuffer(File file)
|
||||||
throws FileNotFoundException, IOException {
|
throws FileNotFoundException, IOException {
|
||||||
final FileInputStream inStream = new FileInputStream(file);
|
final FileInputStream inStream = new FileInputStream(file);
|
||||||
try {
|
try {
|
||||||
inStream.read(buffer);
|
inStream.read(buffer);
|
||||||
return new ByteArrayWrapper(buffer);
|
return new ByteArrayDictBuffer(buffer);
|
||||||
} finally {
|
} finally {
|
||||||
inStream.close();
|
inStream.close();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
return BinaryDictDecoder.readHeader(dictReader);
|
return BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
}
|
}
|
||||||
|
|
||||||
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
|
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
|
||||||
|
@ -636,4 +637,19 @@ public final class BinaryDictIOUtils {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Calculate bigram frequency from compressed value
|
||||||
|
*
|
||||||
|
* @param unigramFrequency
|
||||||
|
* @param bigramFrequency compressed frequency
|
||||||
|
* @return approximate bigram frequency
|
||||||
|
*/
|
||||||
|
public static int reconstructBigramFrequency(final int unigramFrequency,
|
||||||
|
final int bigramFrequency) {
|
||||||
|
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
|
||||||
|
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
|
||||||
|
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
|
||||||
|
return (int)resultFreqFloat;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -1,169 +0,0 @@
|
||||||
/*
|
|
||||||
* Copyright (C) 2013 The Android Open Source Project
|
|
||||||
*
|
|
||||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
||||||
* you may not use this file except in compliance with the License.
|
|
||||||
* You may obtain a copy of the License at
|
|
||||||
*
|
|
||||||
* http://www.apache.org/licenses/LICENSE-2.0
|
|
||||||
*
|
|
||||||
* Unless required by applicable law or agreed to in writing, software
|
|
||||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
||||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
||||||
* See the License for the specific language governing permissions and
|
|
||||||
* limitations under the License.
|
|
||||||
*/
|
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
|
||||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
|
|
||||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
|
||||||
|
|
||||||
import java.io.File;
|
|
||||||
import java.io.FileInputStream;
|
|
||||||
import java.io.FileNotFoundException;
|
|
||||||
import java.io.IOException;
|
|
||||||
import java.io.RandomAccessFile;
|
|
||||||
import java.nio.ByteBuffer;
|
|
||||||
import java.nio.channels.FileChannel;
|
|
||||||
import java.util.HashMap;
|
|
||||||
|
|
||||||
public class BinaryDictReader implements HeaderReaderInterface {
|
|
||||||
|
|
||||||
public interface FusionDictionaryBufferFactory {
|
|
||||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
|
||||||
throws FileNotFoundException, IOException;
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates FusionDictionaryBuffer from a ByteBuffer
|
|
||||||
*/
|
|
||||||
public static final class FusionDictionaryBufferFromByteBufferFactory
|
|
||||||
implements FusionDictionaryBufferFactory {
|
|
||||||
@Override
|
|
||||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
|
||||||
throws FileNotFoundException, IOException {
|
|
||||||
FileInputStream inStream = null;
|
|
||||||
ByteBuffer buffer = null;
|
|
||||||
try {
|
|
||||||
inStream = new FileInputStream(file);
|
|
||||||
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
|
|
||||||
0, file.length());
|
|
||||||
} finally {
|
|
||||||
if (inStream != null) {
|
|
||||||
inStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (buffer != null) {
|
|
||||||
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates FusionDictionaryBuffer from a byte array
|
|
||||||
*/
|
|
||||||
public static final class FusionDictionaryBufferFromByteArrayFactory
|
|
||||||
implements FusionDictionaryBufferFactory {
|
|
||||||
@Override
|
|
||||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
|
||||||
throws FileNotFoundException, IOException {
|
|
||||||
FileInputStream inStream = null;
|
|
||||||
try {
|
|
||||||
inStream = new FileInputStream(file);
|
|
||||||
final byte[] array = new byte[(int) file.length()];
|
|
||||||
inStream.read(array);
|
|
||||||
return new ByteArrayWrapper(array);
|
|
||||||
} finally {
|
|
||||||
if (inStream != null) {
|
|
||||||
inStream.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
|
||||||
* Creates FusionDictionaryBuffer from a RandomAccessFile.
|
|
||||||
*/
|
|
||||||
@UsedForTesting
|
|
||||||
public static final class FusionDictionaryBufferFromWritableByteBufferFactory
|
|
||||||
implements FusionDictionaryBufferFactory {
|
|
||||||
@Override
|
|
||||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
|
||||||
throws FileNotFoundException, IOException {
|
|
||||||
RandomAccessFile raFile = null;
|
|
||||||
ByteBuffer buffer = null;
|
|
||||||
try {
|
|
||||||
raFile = new RandomAccessFile(file, "rw");
|
|
||||||
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
|
|
||||||
} finally {
|
|
||||||
if (raFile != null) {
|
|
||||||
raFile.close();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (buffer != null) {
|
|
||||||
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
private final File mDictionaryBinaryFile;
|
|
||||||
private FusionDictionaryBufferInterface mFusionDictionaryBuffer;
|
|
||||||
|
|
||||||
public BinaryDictReader(final File file) {
|
|
||||||
mDictionaryBinaryFile = file;
|
|
||||||
mFusionDictionaryBuffer = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
public void openBuffer(final FusionDictionaryBufferFactory factory)
|
|
||||||
throws FileNotFoundException, IOException {
|
|
||||||
mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile);
|
|
||||||
}
|
|
||||||
|
|
||||||
public FusionDictionaryBufferInterface getBuffer() {
|
|
||||||
return mFusionDictionaryBuffer;
|
|
||||||
}
|
|
||||||
|
|
||||||
@UsedForTesting
|
|
||||||
public FusionDictionaryBufferInterface openAndGetBuffer(
|
|
||||||
final FusionDictionaryBufferFactory factory)
|
|
||||||
throws FileNotFoundException, IOException {
|
|
||||||
openBuffer(factory);
|
|
||||||
return getBuffer();
|
|
||||||
}
|
|
||||||
|
|
||||||
// The implementation of HeaderReaderInterface
|
|
||||||
@Override
|
|
||||||
public int readVersion() throws IOException, UnsupportedFormatException {
|
|
||||||
return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int readOptionFlags() {
|
|
||||||
return mFusionDictionaryBuffer.readUnsignedShort();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public int readHeaderSize() {
|
|
||||||
return mFusionDictionaryBuffer.readInt();
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public HashMap<String, String> readAttributes(final int headerSize) {
|
|
||||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
|
||||||
while (mFusionDictionaryBuffer.position() < headerSize) {
|
|
||||||
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
|
|
||||||
// by calling CharEncoding.readString.
|
|
||||||
final String key = CharEncoding.readString(mFusionDictionaryBuffer);
|
|
||||||
final String value = CharEncoding.readString(mFusionDictionaryBuffer);
|
|
||||||
attributes.put(key, value);
|
|
||||||
}
|
|
||||||
mFusionDictionaryBuffer.position(headerSize);
|
|
||||||
return attributes;
|
|
||||||
}
|
|
||||||
}
|
|
|
@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.annotations.UsedForTesting;
|
import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.Constants;
|
import com.android.inputmethod.latin.Constants;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
@ -49,142 +49,146 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
/**
|
/**
|
||||||
* Delete the word from the binary file.
|
* Delete the word from the binary file.
|
||||||
*
|
*
|
||||||
* @param dictReader the dict reader.
|
* @param dictDecoder the dict decoder.
|
||||||
* @param word the word we delete
|
* @param word the word we delete
|
||||||
* @throws IOException
|
* @throws IOException
|
||||||
* @throws UnsupportedFormatException
|
* @throws UnsupportedFormatException
|
||||||
*/
|
*/
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static void deleteWord(final BinaryDictReader dictReader, final String word)
|
public static void deleteWord(final BinaryDictDecoder dictDecoder, final String word)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
buffer.position(0);
|
dictBuffer.position(0);
|
||||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
|
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
|
||||||
if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
|
if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
|
||||||
|
|
||||||
buffer.position(wordPosition);
|
dictBuffer.position(wordPosition);
|
||||||
final int flags = buffer.readUnsignedByte();
|
final int flags = dictBuffer.readUnsignedByte();
|
||||||
buffer.position(wordPosition);
|
dictBuffer.position(wordPosition);
|
||||||
buffer.put((byte)markAsDeleted(flags));
|
dictBuffer.put((byte)markAsDeleted(flags));
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update a parent address in a CharGroup that is referred to by groupOriginAddress.
|
* Update a parent address in a CharGroup that is referred to by groupOriginAddress.
|
||||||
*
|
*
|
||||||
* @param buffer the buffer to write.
|
* @param dictBuffer the DictBuffer to write.
|
||||||
* @param groupOriginAddress the address of the group.
|
* @param groupOriginAddress the address of the group.
|
||||||
* @param newParentAddress the absolute address of the parent.
|
* @param newParentAddress the absolute address of the parent.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
*/
|
*/
|
||||||
public static void updateParentAddress(final FusionDictionaryBufferInterface buffer,
|
public static void updateParentAddress(final DictBuffer dictBuffer,
|
||||||
final int groupOriginAddress, final int newParentAddress,
|
final int groupOriginAddress, final int newParentAddress,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
final int originalPosition = buffer.position();
|
final int originalPosition = dictBuffer.position();
|
||||||
buffer.position(groupOriginAddress);
|
dictBuffer.position(groupOriginAddress);
|
||||||
if (!formatOptions.mSupportsDynamicUpdate) {
|
if (!formatOptions.mSupportsDynamicUpdate) {
|
||||||
throw new RuntimeException("this file format does not support parent addresses");
|
throw new RuntimeException("this file format does not support parent addresses");
|
||||||
}
|
}
|
||||||
final int flags = buffer.readUnsignedByte();
|
final int flags = dictBuffer.readUnsignedByte();
|
||||||
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
|
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
|
||||||
// If the group is moved, the parent address is stored in the destination group.
|
// If the group is moved, the parent address is stored in the destination group.
|
||||||
// We are guaranteed to process the destination group later, so there is no need to
|
// We are guaranteed to process the destination group later, so there is no need to
|
||||||
// update anything here.
|
// update anything here.
|
||||||
buffer.position(originalPosition);
|
dictBuffer.position(originalPosition);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (DBG) {
|
if (DBG) {
|
||||||
MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress);
|
MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress);
|
||||||
}
|
}
|
||||||
final int parentOffset = newParentAddress - groupOriginAddress;
|
final int parentOffset = newParentAddress - groupOriginAddress;
|
||||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, parentOffset);
|
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset);
|
||||||
buffer.position(originalPosition);
|
dictBuffer.position(originalPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update parent addresses in a node array stored at nodeOriginAddress.
|
* Update parent addresses in a node array stored at nodeOriginAddress.
|
||||||
*
|
*
|
||||||
* @param buffer the buffer to be modified.
|
* @param dictBuffer the DictBuffer to be modified.
|
||||||
* @param nodeOriginAddress the address of the node array to update.
|
* @param nodeOriginAddress the address of the node array to update.
|
||||||
* @param newParentAddress the address to be written.
|
* @param newParentAddress the address to be written.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
*/
|
*/
|
||||||
public static void updateParentAddresses(final FusionDictionaryBufferInterface buffer,
|
public static void updateParentAddresses(final DictBuffer dictBuffer,
|
||||||
final int nodeOriginAddress, final int newParentAddress,
|
final int nodeOriginAddress, final int newParentAddress,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
final int originalPosition = buffer.position();
|
final int originalPosition = dictBuffer.position();
|
||||||
buffer.position(nodeOriginAddress);
|
dictBuffer.position(nodeOriginAddress);
|
||||||
do {
|
do {
|
||||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions);
|
updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress,
|
||||||
BinaryDictIOUtils.skipCharGroup(buffer, formatOptions);
|
formatOptions);
|
||||||
|
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
|
||||||
}
|
}
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||||
buffer.position(forwardLinkAddress);
|
dictBuffer.position(forwardLinkAddress);
|
||||||
} while (formatOptions.mSupportsDynamicUpdate
|
} while (formatOptions.mSupportsDynamicUpdate
|
||||||
&& buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
&& dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||||
buffer.position(originalPosition);
|
dictBuffer.position(originalPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
|
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
|
||||||
*
|
*
|
||||||
* @param buffer the buffer to write.
|
* @param dictBuffer the DictBuffer to write.
|
||||||
* @param groupOriginAddress the address of the group.
|
* @param groupOriginAddress the address of the group.
|
||||||
* @param newChildrenAddress the absolute address of the child.
|
* @param newChildrenAddress the absolute address of the child.
|
||||||
* @param formatOptions file format options.
|
* @param formatOptions file format options.
|
||||||
*/
|
*/
|
||||||
public static void updateChildrenAddress(final FusionDictionaryBufferInterface buffer,
|
public static void updateChildrenAddress(final DictBuffer dictBuffer,
|
||||||
final int groupOriginAddress, final int newChildrenAddress,
|
final int groupOriginAddress, final int newChildrenAddress,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
final int originalPosition = buffer.position();
|
final int originalPosition = dictBuffer.position();
|
||||||
buffer.position(groupOriginAddress);
|
dictBuffer.position(groupOriginAddress);
|
||||||
final int flags = buffer.readUnsignedByte();
|
final int flags = dictBuffer.readUnsignedByte();
|
||||||
final int parentAddress = BinaryDictDecoder.readParentAddress(buffer, formatOptions);
|
final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer,
|
||||||
BinaryDictIOUtils.skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
formatOptions);
|
||||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
|
BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||||
|
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
|
||||||
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
|
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
|
||||||
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
|
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - dictBuffer.position();
|
||||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, childrenOffset);
|
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, childrenOffset);
|
||||||
buffer.position(originalPosition);
|
dictBuffer.position(originalPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Helper method to move a char group to the tail of the file.
|
* Helper method to move a char group to the tail of the file.
|
||||||
*/
|
*/
|
||||||
private static int moveCharGroup(final OutputStream destination,
|
private static int moveCharGroup(final OutputStream destination,
|
||||||
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
|
final DictBuffer dictBuffer, final CharGroupInfo info,
|
||||||
final int nodeArrayOriginAddress, final int oldGroupAddress,
|
final int nodeArrayOriginAddress, final int oldGroupAddress,
|
||||||
final FormatOptions formatOptions) throws IOException {
|
final FormatOptions formatOptions) throws IOException {
|
||||||
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
|
updateParentAddress(dictBuffer, oldGroupAddress, dictBuffer.limit() + 1, formatOptions);
|
||||||
buffer.position(oldGroupAddress);
|
dictBuffer.position(oldGroupAddress);
|
||||||
final int currentFlags = buffer.readUnsignedByte();
|
final int currentFlags = dictBuffer.readUnsignedByte();
|
||||||
buffer.position(oldGroupAddress);
|
dictBuffer.position(oldGroupAddress);
|
||||||
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
||||||
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
||||||
int size = FormatSpec.GROUP_FLAGS_SIZE;
|
int size = FormatSpec.GROUP_FLAGS_SIZE;
|
||||||
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
|
updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
|
||||||
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
|
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
|
||||||
return size;
|
return size;
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("unused")
|
@SuppressWarnings("unused")
|
||||||
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
|
private static void updateForwardLink(final DictBuffer dictBuffer,
|
||||||
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
|
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
|
||||||
final FormatOptions formatOptions) {
|
final FormatOptions formatOptions) {
|
||||||
buffer.position(nodeArrayOriginAddress);
|
dictBuffer.position(nodeArrayOriginAddress);
|
||||||
int jumpCount = 0;
|
int jumpCount = 0;
|
||||||
while (jumpCount++ < MAX_JUMPS) {
|
while (jumpCount++ < MAX_JUMPS) {
|
||||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||||
for (int i = 0; i < count; ++i) BinaryDictIOUtils.skipCharGroup(buffer, formatOptions);
|
for (int i = 0; i < count; ++i) {
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
|
||||||
|
}
|
||||||
|
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||||
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||||
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
dictBuffer.position(dictBuffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
||||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
|
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeArrayAddress);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
buffer.position(forwardLinkAddress);
|
dictBuffer.position(forwardLinkAddress);
|
||||||
}
|
}
|
||||||
if (DBG && jumpCount >= MAX_JUMPS) {
|
if (DBG && jumpCount >= MAX_JUMPS) {
|
||||||
throw new RuntimeException("too many jumps, probably a bug.");
|
throw new RuntimeException("too many jumps, probably a bug.");
|
||||||
|
@ -204,7 +208,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
* @param shortcutTargets the shortcut targets for this group.
|
* @param shortcutTargets the shortcut targets for this group.
|
||||||
* @param bigrams the bigrams for this group.
|
* @param bigrams the bigrams for this group.
|
||||||
* @param destination the stream representing the tail of the file.
|
* @param destination the stream representing the tail of the file.
|
||||||
* @param buffer the buffer representing the (constant-size) body of the file.
|
* @param dictBuffer the DictBuffer representing the (constant-size) body of the file.
|
||||||
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
|
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
|
||||||
* @param oldGroupOrigin the old origin where this group used to be stored.
|
* @param oldGroupOrigin the old origin where this group used to be stored.
|
||||||
* @param formatOptions format options for this dictionary.
|
* @param formatOptions format options for this dictionary.
|
||||||
|
@ -215,7 +219,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
final int length, final int flags, final int frequency, final int parentAddress,
|
final int length, final int flags, final int frequency, final int parentAddress,
|
||||||
final ArrayList<WeightedString> shortcutTargets,
|
final ArrayList<WeightedString> shortcutTargets,
|
||||||
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
|
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
|
||||||
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
|
final DictBuffer dictBuffer, final int oldNodeArrayOrigin,
|
||||||
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
|
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
|
||||||
int size = 0;
|
int size = 0;
|
||||||
final int newGroupOrigin = fileEndAddress + 1;
|
final int newGroupOrigin = fileEndAddress + 1;
|
||||||
|
@ -228,7 +232,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
flags, writtenCharacters, frequency, parentAddress,
|
flags, writtenCharacters, frequency, parentAddress,
|
||||||
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
||||||
bigrams);
|
bigrams);
|
||||||
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
|
moveCharGroup(destination, dictBuffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
|
||||||
formatOptions);
|
formatOptions);
|
||||||
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||||
}
|
}
|
||||||
|
@ -236,7 +240,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
/**
|
/**
|
||||||
* Insert a word into a binary dictionary.
|
* Insert a word into a binary dictionary.
|
||||||
*
|
*
|
||||||
* @param dictReader the dict reader.
|
* @param dictDecoder the dict decoder.
|
||||||
* @param destination a stream to the underlying file, with the pointer at the end of the file.
|
* @param destination a stream to the underlying file, with the pointer at the end of the file.
|
||||||
* @param word the word to insert.
|
* @param word the word to insert.
|
||||||
* @param frequency the frequency of the new word.
|
* @param frequency the frequency of the new word.
|
||||||
|
@ -249,16 +253,17 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
// TODO: Support batch insertion.
|
// TODO: Support batch insertion.
|
||||||
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
||||||
@UsedForTesting
|
@UsedForTesting
|
||||||
public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination,
|
public static void insertWord(final BinaryDictDecoder dictDecoder,
|
||||||
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
|
final OutputStream destination, final String word, final int frequency,
|
||||||
|
final ArrayList<WeightedString> bigramStrings,
|
||||||
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
|
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
|
||||||
final boolean isBlackListEntry)
|
final boolean isBlackListEntry)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
|
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
if (bigramStrings != null) {
|
if (bigramStrings != null) {
|
||||||
for (final WeightedString bigram : bigramStrings) {
|
for (final WeightedString bigram : bigramStrings) {
|
||||||
int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord);
|
int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, bigram.mWord);
|
||||||
if (position == FormatSpec.NOT_VALID_WORD) {
|
if (position == FormatSpec.NOT_VALID_WORD) {
|
||||||
// TODO: figure out what is the correct thing to do here.
|
// TODO: figure out what is the correct thing to do here.
|
||||||
} else {
|
} else {
|
||||||
|
@ -272,24 +277,24 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty();
|
final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty();
|
||||||
|
|
||||||
// find the insert position of the word.
|
// find the insert position of the word.
|
||||||
if (buffer.position() != 0) buffer.position(0);
|
if (dictBuffer.position() != 0) dictBuffer.position(0);
|
||||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
|
|
||||||
int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position();
|
int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position();
|
||||||
final int[] codePoints = FusionDictionary.getCodePoints(word);
|
final int[] codePoints = FusionDictionary.getCodePoints(word);
|
||||||
final int wordLen = codePoints.length;
|
final int wordLen = codePoints.length;
|
||||||
|
|
||||||
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
||||||
if (wordPos >= wordLen) break;
|
if (wordPos >= wordLen) break;
|
||||||
nodeOriginAddress = buffer.position();
|
nodeOriginAddress = dictBuffer.position();
|
||||||
int nodeParentAddress = -1;
|
int nodeParentAddress = -1;
|
||||||
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer);
|
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||||
boolean foundNextGroup = false;
|
boolean foundNextGroup = false;
|
||||||
|
|
||||||
for (int i = 0; i < charGroupCount; ++i) {
|
for (int i = 0; i < charGroupCount; ++i) {
|
||||||
address = buffer.position();
|
address = dictBuffer.position();
|
||||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
|
||||||
buffer.position(), fileHeader.mFormatOptions);
|
dictBuffer.position(), fileHeader.mFormatOptions);
|
||||||
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
|
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
|
||||||
fileHeader.mFormatOptions);
|
fileHeader.mFormatOptions);
|
||||||
if (isMovedGroup) continue;
|
if (isMovedGroup) continue;
|
||||||
|
@ -308,18 +313,18 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
* after
|
* after
|
||||||
* abc - d - ef
|
* abc - d - ef
|
||||||
*/
|
*/
|
||||||
final int newNodeAddress = buffer.limit();
|
final int newNodeAddress = dictBuffer.limit();
|
||||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
|
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
|
||||||
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
|
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
|
||||||
false /* isBlackListEntry */, fileHeader.mFormatOptions);
|
false /* isBlackListEntry */, fileHeader.mFormatOptions);
|
||||||
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
|
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
|
||||||
frequency, nodeParentAddress, shortcuts, bigrams, destination,
|
frequency, nodeParentAddress, shortcuts, bigrams, destination,
|
||||||
buffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
|
dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
|
||||||
|
|
||||||
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
|
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
|
||||||
currentInfo.mCharacters.length);
|
currentInfo.mCharacters.length);
|
||||||
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||||
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
|
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
|
||||||
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
||||||
}
|
}
|
||||||
final CharGroupInfo newInfo2 = new CharGroupInfo(
|
final CharGroupInfo newInfo2 = new CharGroupInfo(
|
||||||
|
@ -344,7 +349,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
* - c
|
* - c
|
||||||
*/
|
*/
|
||||||
|
|
||||||
final int newNodeAddress = buffer.limit();
|
final int newNodeAddress = dictBuffer.limit();
|
||||||
final int childrenAddress = currentInfo.mChildrenAddress;
|
final int childrenAddress = currentInfo.mChildrenAddress;
|
||||||
|
|
||||||
// move prefix
|
// move prefix
|
||||||
|
@ -355,13 +360,13 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
fileHeader.mFormatOptions);
|
fileHeader.mFormatOptions);
|
||||||
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
|
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
|
||||||
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
|
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
|
||||||
destination, buffer, nodeOriginAddress, address,
|
destination, dictBuffer, nodeOriginAddress, address,
|
||||||
fileHeader.mFormatOptions);
|
fileHeader.mFormatOptions);
|
||||||
|
|
||||||
final int[] suffixCharacters = Arrays.copyOfRange(
|
final int[] suffixCharacters = Arrays.copyOfRange(
|
||||||
currentInfo.mCharacters, p, currentInfo.mCharacters.length);
|
currentInfo.mCharacters, p, currentInfo.mCharacters.length);
|
||||||
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||||
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
|
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
|
||||||
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
||||||
}
|
}
|
||||||
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
|
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
|
||||||
|
@ -403,7 +408,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
if (wordPos + currentInfo.mCharacters.length == wordLen) {
|
if (wordPos + currentInfo.mCharacters.length == wordLen) {
|
||||||
// the word exists in the dictionary.
|
// the word exists in the dictionary.
|
||||||
// only update group.
|
// only update group.
|
||||||
final int newNodeAddress = buffer.limit();
|
final int newNodeAddress = dictBuffer.limit();
|
||||||
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
|
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
|
||||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
|
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
|
||||||
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
||||||
|
@ -412,7 +417,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
|
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
|
||||||
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
|
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
|
||||||
bigrams);
|
bigrams);
|
||||||
moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address,
|
moveCharGroup(destination, dictBuffer, newInfo, nodeOriginAddress, address,
|
||||||
fileHeader.mFormatOptions);
|
fileHeader.mFormatOptions);
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
@ -430,8 +435,8 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
* after
|
* after
|
||||||
* ab - cd - e
|
* ab - cd - e
|
||||||
*/
|
*/
|
||||||
final int newNodeAddress = buffer.limit();
|
final int newNodeAddress = dictBuffer.limit();
|
||||||
updateChildrenAddress(buffer, address, newNodeAddress,
|
updateChildrenAddress(dictBuffer, address, newNodeAddress,
|
||||||
fileHeader.mFormatOptions);
|
fileHeader.mFormatOptions);
|
||||||
final int newGroupAddress = newNodeAddress + 1;
|
final int newGroupAddress = newNodeAddress + 1;
|
||||||
final boolean hasMultipleChars = (wordLen - wordPos) > 1;
|
final boolean hasMultipleChars = (wordLen - wordPos) > 1;
|
||||||
|
@ -445,7 +450,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
|
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
buffer.position(currentInfo.mChildrenAddress);
|
dictBuffer.position(currentInfo.mChildrenAddress);
|
||||||
foundNextGroup = true;
|
foundNextGroup = true;
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
@ -454,8 +459,8 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
if (foundNextGroup) continue;
|
if (foundNextGroup) continue;
|
||||||
|
|
||||||
// reached the end of the array.
|
// reached the end of the array.
|
||||||
final int linkAddressPosition = buffer.position();
|
final int linkAddressPosition = dictBuffer.position();
|
||||||
int nextLink = buffer.readUnsignedInt24();
|
int nextLink = dictBuffer.readUnsignedInt24();
|
||||||
if ((nextLink & FormatSpec.MSB24) != 0) {
|
if ((nextLink & FormatSpec.MSB24) != 0) {
|
||||||
nextLink = -(nextLink & FormatSpec.SINT24_MAX);
|
nextLink = -(nextLink & FormatSpec.SINT24_MAX);
|
||||||
}
|
}
|
||||||
|
@ -475,9 +480,9 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
*/
|
*/
|
||||||
|
|
||||||
// change the forward link address.
|
// change the forward link address.
|
||||||
final int newNodeAddress = buffer.limit();
|
final int newNodeAddress = dictBuffer.limit();
|
||||||
buffer.position(linkAddressPosition);
|
dictBuffer.position(linkAddressPosition);
|
||||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
|
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress);
|
||||||
|
|
||||||
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
|
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
|
||||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
|
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
|
||||||
|
@ -490,7 +495,7 @@ public final class DynamicBinaryDictIOUtils {
|
||||||
return;
|
return;
|
||||||
} else {
|
} else {
|
||||||
depth--;
|
depth--;
|
||||||
buffer.position(nextLink);
|
dictBuffer.position(nextLink);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -24,7 +24,7 @@ import java.util.HashMap;
|
||||||
/**
|
/**
|
||||||
* An interface to read a binary dictionary file header.
|
* An interface to read a binary dictionary file header.
|
||||||
*/
|
*/
|
||||||
public interface HeaderReaderInterface {
|
public interface HeaderReader {
|
||||||
public int readVersion() throws IOException, UnsupportedFormatException;
|
public int readVersion() throws IOException, UnsupportedFormatException;
|
||||||
public int readOptionFlags();
|
public int readOptionFlags();
|
||||||
public int readHeaderSize();
|
public int readHeaderSize();
|
|
@ -28,7 +28,7 @@ import com.android.inputmethod.latin.ExpandableDictionary;
|
||||||
import com.android.inputmethod.latin.LatinImeLogger;
|
import com.android.inputmethod.latin.LatinImeLogger;
|
||||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||||
import com.android.inputmethod.latin.WordComposer;
|
import com.android.inputmethod.latin.WordComposer;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.settings.Settings;
|
import com.android.inputmethod.latin.settings.Settings;
|
||||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||||
|
@ -241,10 +241,10 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableDictiona
|
||||||
};
|
};
|
||||||
|
|
||||||
// Load the dictionary from binary file
|
// Load the dictionary from binary file
|
||||||
final BinaryDictReader reader = new BinaryDictReader(
|
final BinaryDictDecoder reader = new BinaryDictDecoder(
|
||||||
new File(getContext().getFilesDir(), fileName));
|
new File(getContext().getFilesDir(), fileName));
|
||||||
try {
|
try {
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
reader.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||||
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
|
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
// This is an expected condition: we don't have a user history dictionary for this
|
// This is an expected condition: we don't have a user history dictionary for this
|
||||||
|
|
|
@ -16,17 +16,17 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.utils;
|
package com.android.inputmethod.latin.utils;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* This class provides an implementation for the FusionDictionary buffer interface that is backed
|
* This class provides an implementation for the FusionDictionary buffer interface that is backed
|
||||||
* by a simpled byte array. It allows to create a binary dictionary in memory.
|
* by a simpled byte array. It allows to create a binary dictionary in memory.
|
||||||
*/
|
*/
|
||||||
public final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
|
public final class ByteArrayDictBuffer implements DictBuffer {
|
||||||
private byte[] mBuffer;
|
private byte[] mBuffer;
|
||||||
private int mPosition;
|
private int mPosition;
|
||||||
|
|
||||||
public ByteArrayWrapper(final byte[] buffer) {
|
public ByteArrayDictBuffer(final byte[] buffer) {
|
||||||
mBuffer = buffer;
|
mBuffer = buffer;
|
||||||
mPosition = 0;
|
mPosition = 0;
|
||||||
}
|
}
|
|
@ -22,7 +22,6 @@ import com.android.inputmethod.annotations.UsedForTesting;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
|
@ -119,13 +118,13 @@ public final class UserHistoryDictIOUtils {
|
||||||
/**
|
/**
|
||||||
* Reads dictionary from file.
|
* Reads dictionary from file.
|
||||||
*/
|
*/
|
||||||
public static void readDictionaryBinary(final BinaryDictReader reader,
|
public static void readDictionaryBinary(final BinaryDictDecoder dictDecoder,
|
||||||
final OnAddWordListener dict) {
|
final OnAddWordListener dict) {
|
||||||
final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
|
final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
|
||||||
final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
|
final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
|
||||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
|
final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
|
||||||
try {
|
try {
|
||||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, unigrams, frequencies,
|
BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies,
|
||||||
bigrams);
|
bigrams);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "IO exception while reading file", e);
|
Log.e(TAG, "IO exception while reading file", e);
|
||||||
|
@ -157,7 +156,7 @@ public final class UserHistoryDictIOUtils {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
to.setBigram(word1, word2,
|
to.setBigram(word1, word2,
|
||||||
BinaryDictDecoder.reconstructBigramFrequency(unigramFrequency,
|
BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
|
||||||
attr.mFrequency));
|
attr.mFrequency));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -22,7 +22,7 @@ import android.test.suitebuilder.annotation.LargeTest;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
import android.util.SparseArray;
|
import android.util.SparseArray;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
|
@ -44,7 +44,7 @@ import java.util.Random;
|
||||||
import java.util.Set;
|
import java.util.Set;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unit tests for BinaryDictDecoder and BinaryDictEncoder.
|
* Unit tests for BinaryDictDecoderUtils and BinaryDictEncoder.
|
||||||
*/
|
*/
|
||||||
@LargeTest
|
@LargeTest
|
||||||
public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
|
@ -118,14 +118,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
// Utilities for test
|
// Utilities for test
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Makes new buffer according to BUFFER_TYPE.
|
* Makes new DictBuffer according to BUFFER_TYPE.
|
||||||
*/
|
*/
|
||||||
private void getBuffer(final BinaryDictReader reader, final int bufferType)
|
private void getDictBuffer(final BinaryDictDecoder dictDecoder, final int bufferType)
|
||||||
throws FileNotFoundException, IOException {
|
throws FileNotFoundException, IOException {
|
||||||
if (bufferType == USE_BYTE_BUFFER) {
|
if (bufferType == USE_BYTE_BUFFER) {
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
dictDecoder.openDictBuffer(
|
||||||
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
} else if (bufferType == USE_BYTE_ARRAY) {
|
} else if (bufferType == USE_BYTE_ARRAY) {
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
dictDecoder.openDictBuffer(
|
||||||
|
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -269,14 +271,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap,
|
final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap,
|
||||||
final int bufferType) {
|
final int bufferType) {
|
||||||
long now, diff = -1;
|
long now, diff = -1;
|
||||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
|
|
||||||
FusionDictionary dict = null;
|
FusionDictionary dict = null;
|
||||||
try {
|
try {
|
||||||
getBuffer(reader, bufferType);
|
getDictBuffer(dictDecoder, bufferType);
|
||||||
assertNotNull(reader.getBuffer());
|
assertNotNull(dictDecoder.getDictBuffer());
|
||||||
now = System.currentTimeMillis();
|
now = System.currentTimeMillis();
|
||||||
dict = BinaryDictDecoder.readDictionaryBinary(reader, null);
|
dict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
|
||||||
diff = System.currentTimeMillis() - now;
|
diff = System.currentTimeMillis() - now;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "IOException while reading dictionary", e);
|
Log.e(TAG, "IOException while reading dictionary", e);
|
||||||
|
@ -388,7 +390,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
actBigrams.get(word1).add(word2);
|
actBigrams.get(word1).add(word2);
|
||||||
|
|
||||||
final int bigramFreq = BinaryDictDecoder.reconstructBigramFrequency(
|
final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
|
||||||
unigramFreq, attr.mFrequency);
|
unigramFreq, attr.mFrequency);
|
||||||
assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
|
assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
|
||||||
}
|
}
|
||||||
|
@ -407,12 +409,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
|
final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
|
||||||
|
|
||||||
long now = -1, diff = -1;
|
long now = -1, diff = -1;
|
||||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
try {
|
try {
|
||||||
getBuffer(reader, bufferType);
|
getDictBuffer(dictDecoder, bufferType);
|
||||||
assertNotNull("Can't get buffer.", reader.getBuffer());
|
assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer());
|
||||||
now = System.currentTimeMillis();
|
now = System.currentTimeMillis();
|
||||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, resultWords, resultFreqs,
|
BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs,
|
||||||
resultBigrams);
|
resultBigrams);
|
||||||
diff = System.currentTimeMillis() - now;
|
diff = System.currentTimeMillis() - now;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
@ -497,31 +499,31 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Tests for getTerminalPosition
|
// Tests for getTerminalPosition
|
||||||
private String getWordFromBinary(final BinaryDictReader dictReader, final int address) {
|
private String getWordFromBinary(final BinaryDictDecoder dictDecoder, final int address) {
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||||
if (buffer.position() != 0) buffer.position(0);
|
if (dictBuffer.position() != 0) dictBuffer.position(0);
|
||||||
|
|
||||||
FileHeader fileHeader = null;
|
FileHeader fileHeader = null;
|
||||||
try {
|
try {
|
||||||
fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
return null;
|
return null;
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
if (fileHeader == null) return null;
|
if (fileHeader == null) return null;
|
||||||
return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize,
|
return BinaryDictDecoderUtils.getWordAtAddress(dictBuffer, fileHeader.mHeaderSize,
|
||||||
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
|
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
|
||||||
}
|
}
|
||||||
|
|
||||||
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,
|
private long runGetTerminalPosition(final BinaryDictDecoder dictDecoder, final String word,
|
||||||
boolean contained) {
|
int index, boolean contained) {
|
||||||
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
|
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
|
||||||
long diff = -1;
|
long diff = -1;
|
||||||
int position = -1;
|
int position = -1;
|
||||||
try {
|
try {
|
||||||
final long now = System.nanoTime();
|
final long now = System.nanoTime();
|
||||||
position = BinaryDictIOUtils.getTerminalPosition(reader, word);
|
position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
|
||||||
diff = System.nanoTime() - now;
|
diff = System.nanoTime() - now;
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "IOException while getTerminalPosition", e);
|
Log.e(TAG, "IOException while getTerminalPosition", e);
|
||||||
|
@ -530,7 +532,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
|
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
|
||||||
if (contained) assertEquals(getWordFromBinary(reader, position), word);
|
if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
|
||||||
return diff;
|
return diff;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -550,28 +552,29 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||||
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
|
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
|
||||||
|
|
||||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
try {
|
try {
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
dictDecoder.openDictBuffer(
|
||||||
|
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
Log.e(TAG, "IOException while opening the buffer", e);
|
Log.e(TAG, "IOException while opening the buffer", e);
|
||||||
}
|
}
|
||||||
assertNotNull("Can't get the buffer", reader.getBuffer());
|
assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// too long word
|
// too long word
|
||||||
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
|
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, longWord));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, longWord));
|
||||||
|
|
||||||
// null
|
// null
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, null));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, null));
|
||||||
|
|
||||||
// empty string
|
// empty string
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, ""));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, ""));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
}
|
}
|
||||||
|
@ -579,7 +582,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
// Test a word that is contained within the dictionary.
|
// Test a word that is contained within the dictionary.
|
||||||
long sum = 0;
|
long sum = 0;
|
||||||
for (int i = 0; i < sWords.size(); ++i) {
|
for (int i = 0; i < sWords.size(); ++i) {
|
||||||
final long time = runGetTerminalPosition(reader, sWords.get(i), i, true);
|
final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true);
|
||||||
sum += time == -1 ? 0 : time;
|
sum += time == -1 ? 0 : time;
|
||||||
}
|
}
|
||||||
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
|
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
|
||||||
|
@ -590,7 +593,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
for (int i = 0; i < 1000; ++i) {
|
for (int i = 0; i < 1000; ++i) {
|
||||||
final String word = generateWord(random, codePointSet);
|
final String word = generateWord(random, codePointSet);
|
||||||
if (sWords.indexOf(word) != -1) continue;
|
if (sWords.indexOf(word) != -1) continue;
|
||||||
runGetTerminalPosition(reader, word, i, false);
|
runGetTerminalPosition(dictDecoder, word, i, false);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -610,28 +613,28 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||||
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
|
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
|
||||||
|
|
||||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
try {
|
try {
|
||||||
reader.openBuffer(
|
dictDecoder.openDictBuffer(
|
||||||
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
// ignore
|
// ignore
|
||||||
Log.e(TAG, "IOException while opening the buffer", e);
|
Log.e(TAG, "IOException while opening the buffer", e);
|
||||||
}
|
}
|
||||||
assertNotNull("Can't get the buffer", reader.getBuffer());
|
assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
|
||||||
|
|
||||||
try {
|
try {
|
||||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
|
||||||
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0));
|
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0));
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
|
||||||
|
|
||||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
|
||||||
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5));
|
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5));
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
|
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
}
|
}
|
||||||
|
|
|
@ -16,14 +16,14 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.makedict;
|
package com.android.inputmethod.latin.makedict;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.FusionDictionaryBufferFactory;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.DictionaryBufferFactory;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||||
FusionDictionaryBufferFromByteArrayFactory;
|
DictionaryBufferFromByteArrayFactory;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||||
FusionDictionaryBufferFromByteBufferFactory;
|
DictionaryBufferFromReadOnlyByteBufferFactory;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||||
FusionDictionaryBufferFromWritableByteBufferFactory;
|
DictionaryBufferFromWritableByteBufferFactory;
|
||||||
|
|
||||||
import android.test.AndroidTestCase;
|
import android.test.AndroidTestCase;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
@ -33,10 +33,10 @@ import java.io.FileOutputStream;
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Unit tests for BinaryDictReader
|
* Unit tests for BinaryDictDecoder
|
||||||
*/
|
*/
|
||||||
public class BinaryDictReaderTests extends AndroidTestCase {
|
public class BinaryDictDecoderTests extends AndroidTestCase {
|
||||||
private static final String TAG = BinaryDictReaderTests.class.getSimpleName();
|
private static final String TAG = BinaryDictDecoderTests.class.getSimpleName();
|
||||||
|
|
||||||
private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
|
private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
||||||
|
|
||||||
@SuppressWarnings("null")
|
@SuppressWarnings("null")
|
||||||
public void runTestOpenBuffer(final String testName,
|
public void runTestOpenBuffer(final String testName,
|
||||||
final FusionDictionaryBufferFactory factory) {
|
final DictionaryBufferFactory factory) {
|
||||||
File testFile = null;
|
File testFile = null;
|
||||||
try {
|
try {
|
||||||
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
|
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
|
||||||
|
@ -70,9 +70,9 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
assertNotNull(testFile);
|
assertNotNull(testFile);
|
||||||
final BinaryDictReader reader = new BinaryDictReader(testFile);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
|
||||||
try {
|
try {
|
||||||
reader.openBuffer(factory);
|
dictDecoder.openDictBuffer(factory);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Log.e(TAG, "Failed to open the buffer", e);
|
Log.e(TAG, "Failed to open the buffer", e);
|
||||||
}
|
}
|
||||||
|
@ -80,32 +80,32 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
||||||
writeDataToFile(testFile);
|
writeDataToFile(testFile);
|
||||||
|
|
||||||
try {
|
try {
|
||||||
reader.openBuffer(factory);
|
dictDecoder.openDictBuffer(factory);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
Log.e(TAG, "Raised the exception while opening buffer", e);
|
Log.e(TAG, "Raised the exception while opening buffer", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
assertEquals(testFile.length(), reader.getBuffer().capacity());
|
assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOpenBufferWithByteBuffer() {
|
public void testOpenBufferWithByteBuffer() {
|
||||||
runTestOpenBuffer("testOpenBufferWithByteBuffer",
|
runTestOpenBuffer("testOpenBufferWithByteBuffer",
|
||||||
new FusionDictionaryBufferFromByteBufferFactory());
|
new DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOpenBufferWithByteArray() {
|
public void testOpenBufferWithByteArray() {
|
||||||
runTestOpenBuffer("testOpenBufferWithByteArray",
|
runTestOpenBuffer("testOpenBufferWithByteArray",
|
||||||
new FusionDictionaryBufferFromByteArrayFactory());
|
new DictionaryBufferFromByteArrayFactory());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testOpenBufferWithWritableByteBuffer() {
|
public void testOpenBufferWithWritableByteBuffer() {
|
||||||
runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
|
runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
|
||||||
new FusionDictionaryBufferFromWritableByteBufferFactory());
|
new DictionaryBufferFromWritableByteBufferFactory());
|
||||||
}
|
}
|
||||||
|
|
||||||
@SuppressWarnings("null")
|
@SuppressWarnings("null")
|
||||||
public void runTestGetBuffer(final String testName,
|
public void runTestGetBuffer(final String testName,
|
||||||
final FusionDictionaryBufferFactory factory) {
|
final DictionaryBufferFactory factory) {
|
||||||
File testFile = null;
|
File testFile = null;
|
||||||
try {
|
try {
|
||||||
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
|
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
|
||||||
|
@ -113,40 +113,41 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
||||||
Log.e(TAG, "IOException while the creating temporary file", e);
|
Log.e(TAG, "IOException while the creating temporary file", e);
|
||||||
}
|
}
|
||||||
|
|
||||||
final BinaryDictReader reader = new BinaryDictReader(testFile);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
|
||||||
|
|
||||||
// the default return value of getBuffer() must be null.
|
// the default return value of getBuffer() must be null.
|
||||||
assertNull("the default return value of getBuffer() is not null", reader.getBuffer());
|
assertNull("the default return value of getBuffer() is not null",
|
||||||
|
dictDecoder.getDictBuffer());
|
||||||
|
|
||||||
writeDataToFile(testFile);
|
writeDataToFile(testFile);
|
||||||
assertTrue(testFile.exists());
|
assertTrue(testFile.exists());
|
||||||
Log.d(TAG, "file length = " + testFile.length());
|
Log.d(TAG, "file length = " + testFile.length());
|
||||||
|
|
||||||
FusionDictionaryBufferInterface buffer = null;
|
DictBuffer dictBuffer = null;
|
||||||
try {
|
try {
|
||||||
buffer = reader.openAndGetBuffer(factory);
|
dictBuffer = dictDecoder.openAndGetDictBuffer(factory);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "Failed to open and get the buffer", e);
|
Log.e(TAG, "Failed to open and get the buffer", e);
|
||||||
}
|
}
|
||||||
assertNotNull("the buffer must not be null", buffer);
|
assertNotNull("the buffer must not be null", dictBuffer);
|
||||||
|
|
||||||
for (int i = 0; i < data.length; ++i) {
|
for (int i = 0; i < data.length; ++i) {
|
||||||
assertEquals(data[i], buffer.readUnsignedByte());
|
assertEquals(data[i], dictBuffer.readUnsignedByte());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetBufferWithByteBuffer() {
|
public void testGetBufferWithByteBuffer() {
|
||||||
runTestGetBuffer("testGetBufferWithByteBuffer",
|
runTestGetBuffer("testGetBufferWithByteBuffer",
|
||||||
new FusionDictionaryBufferFromByteBufferFactory());
|
new DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetBufferWithByteArray() {
|
public void testGetBufferWithByteArray() {
|
||||||
runTestGetBuffer("testGetBufferWithByteArray",
|
runTestGetBuffer("testGetBufferWithByteArray",
|
||||||
new FusionDictionaryBufferFromByteArrayFactory());
|
new DictionaryBufferFromByteArrayFactory());
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGetBufferWithWritableByteBuffer() {
|
public void testGetBufferWithWritableByteBuffer() {
|
||||||
runTestGetBuffer("testGetBufferWithWritableByteBuffer",
|
runTestGetBuffer("testGetBufferWithWritableByteBuffer",
|
||||||
new FusionDictionaryBufferFromWritableByteBufferFactory());
|
new DictionaryBufferFromWritableByteBufferFactory());
|
||||||
}
|
}
|
||||||
}
|
}
|
|
@ -21,9 +21,9 @@ import android.test.MoreAsserts;
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
import android.test.suitebuilder.annotation.LargeTest;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||||
FusionDictionaryBufferFromWritableByteBufferFactory;
|
DictionaryBufferFromWritableByteBufferFactory;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
@ -112,26 +112,26 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
Log.d(TAG, " end address = " + info.mEndAddress);
|
Log.d(TAG, " end address = " + info.mEndAddress);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printNode(final FusionDictionaryBufferInterface buffer,
|
private static void printNode(final DictBuffer dictBuffer,
|
||||||
final FormatSpec.FormatOptions formatOptions) {
|
final FormatSpec.FormatOptions formatOptions) {
|
||||||
Log.d(TAG, "Node at " + buffer.position());
|
Log.d(TAG, "Node at " + dictBuffer.position());
|
||||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||||
Log.d(TAG, " charGroupCount = " + count);
|
Log.d(TAG, " charGroupCount = " + count);
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
|
||||||
buffer.position(), formatOptions);
|
dictBuffer.position(), formatOptions);
|
||||||
printCharGroup(currentInfo);
|
printCharGroup(currentInfo);
|
||||||
}
|
}
|
||||||
if (formatOptions.mSupportsDynamicUpdate) {
|
if (formatOptions.mSupportsDynamicUpdate) {
|
||||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||||
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
|
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void printBinaryFile(final BinaryDictReader dictReader)
|
private static void printBinaryFile(final BinaryDictDecoder dictDecoder)
|
||||||
throws IOException, UnsupportedFormatException {
|
throws IOException, UnsupportedFormatException {
|
||||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
final DictBuffer buffer = dictDecoder.getDictBuffer();
|
||||||
while (buffer.position() < buffer.limit()) {
|
while (buffer.position() < buffer.limit()) {
|
||||||
printNode(buffer, fileHeader.mFormatOptions);
|
printNode(buffer, fileHeader.mFormatOptions);
|
||||||
}
|
}
|
||||||
|
@ -139,13 +139,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
|
|
||||||
private int getWordPosition(final File file, final String word) {
|
private int getWordPosition(final File file, final String word) {
|
||||||
int position = FormatSpec.NOT_VALID_WORD;
|
int position = FormatSpec.NOT_VALID_WORD;
|
||||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
FileInputStream inStream = null;
|
FileInputStream inStream = null;
|
||||||
try {
|
try {
|
||||||
inStream = new FileInputStream(file);
|
inStream = new FileInputStream(file);
|
||||||
dictReader.openBuffer(
|
dictDecoder.openDictBuffer(
|
||||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
position = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
|
position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
} finally {
|
} finally {
|
||||||
|
@ -161,12 +161,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private CharGroupInfo findWordFromFile(final File file, final String word) {
|
private CharGroupInfo findWordFromFile(final File file, final String word) {
|
||||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
CharGroupInfo info = null;
|
CharGroupInfo info = null;
|
||||||
try {
|
try {
|
||||||
dictReader.openBuffer(
|
dictDecoder.openDictBuffer(
|
||||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word);
|
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictDecoder, word);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
}
|
}
|
||||||
|
@ -177,18 +177,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
private long insertAndCheckWord(final File file, final String word, final int frequency,
|
private long insertAndCheckWord(final File file, final String word, final int frequency,
|
||||||
final boolean exist, final ArrayList<WeightedString> bigrams,
|
final boolean exist, final ArrayList<WeightedString> bigrams,
|
||||||
final ArrayList<WeightedString> shortcuts) {
|
final ArrayList<WeightedString> shortcuts) {
|
||||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
BufferedOutputStream outStream = null;
|
BufferedOutputStream outStream = null;
|
||||||
long amountOfTime = -1;
|
long amountOfTime = -1;
|
||||||
try {
|
try {
|
||||||
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
|
||||||
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
|
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
|
||||||
|
|
||||||
if (!exist) {
|
if (!exist) {
|
||||||
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
||||||
}
|
}
|
||||||
final long now = System.nanoTime();
|
final long now = System.nanoTime();
|
||||||
DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams,
|
DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams,
|
||||||
shortcuts, false, false);
|
shortcuts, false, false);
|
||||||
amountOfTime = System.nanoTime() - now;
|
amountOfTime = System.nanoTime() - now;
|
||||||
outStream.flush();
|
outStream.flush();
|
||||||
|
@ -211,23 +211,23 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
||||||
}
|
}
|
||||||
|
|
||||||
private void deleteWord(final File file, final String word) {
|
private void deleteWord(final File file, final String word) {
|
||||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
try {
|
try {
|
||||||
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
|
||||||
DynamicBinaryDictIOUtils.deleteWord(dictReader, word);
|
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
} catch (UnsupportedFormatException e) {
|
} catch (UnsupportedFormatException e) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private void checkReverseLookup(final File file, final String word, final int position) {
|
private void checkReverseLookup(final File file, final String word, final int position) {
|
||||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
try {
|
try {
|
||||||
final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer(
|
final DictBuffer dictBuffer = dictDecoder.openAndGetDictBuffer(
|
||||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||||
assertEquals(word,
|
assertEquals(word,
|
||||||
BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(),
|
BinaryDictDecoderUtils.getWordAtAddress(dictDecoder.getDictBuffer(),
|
||||||
fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
|
fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
|
||||||
fileHeader.mFormatOptions).mWord);
|
fileHeader.mFormatOptions).mWord);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
@ -21,7 +21,7 @@ import android.test.AndroidTestCase;
|
||||||
import android.test.suitebuilder.annotation.LargeTest;
|
import android.test.suitebuilder.annotation.LargeTest;
|
||||||
import android.util.Log;
|
import android.util.Log;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
|
@ -147,15 +147,16 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
||||||
}
|
}
|
||||||
|
|
||||||
private void readDictFromFile(final File file, final OnAddWordListener listener) {
|
private void readDictFromFile(final File file, final OnAddWordListener listener) {
|
||||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
try {
|
try {
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
dictDecoder.openDictBuffer(
|
||||||
|
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||||
} catch (FileNotFoundException e) {
|
} catch (FileNotFoundException e) {
|
||||||
Log.e(TAG, "file not found", e);
|
Log.e(TAG, "file not found", e);
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
Log.e(TAG, "IOException", e);
|
Log.e(TAG, "IOException", e);
|
||||||
}
|
}
|
||||||
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
|
UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void testGenerateFusionDictionary() {
|
public void testGenerateFusionDictionary() {
|
||||||
|
|
|
@ -28,7 +28,7 @@ LATINIME_ANNOTATIONS_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/annot
|
||||||
LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin
|
LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin
|
||||||
MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
|
MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
|
||||||
USED_TARGETTED_UTILS := \
|
USED_TARGETTED_UTILS := \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayWrapper.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
|
||||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java
|
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java
|
||||||
|
|
||||||
|
|
|
@ -16,8 +16,8 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||||
|
|
||||||
|
@ -97,7 +97,7 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
|
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
|
||||||
// decoding the file.
|
// decoding the file.
|
||||||
if (depth > MAX_DECODE_DEPTH) return null;
|
if (depth > MAX_DECODE_DEPTH) return null;
|
||||||
if (BinaryDictDecoder.isBinaryDictionary(src)) {
|
if (BinaryDictDecoderUtils.isBinaryDictionary(src)) {
|
||||||
spec.mFile = src;
|
spec.mFile = src;
|
||||||
return spec;
|
return spec;
|
||||||
}
|
}
|
||||||
|
@ -184,15 +184,15 @@ public final class BinaryDictOffdeviceUtils {
|
||||||
crash(filename, new RuntimeException(
|
crash(filename, new RuntimeException(
|
||||||
filename + " does not seem to be a dictionary file"));
|
filename + " does not seem to be a dictionary file"));
|
||||||
} else {
|
} else {
|
||||||
final BinaryDictReader reader = new BinaryDictReader(decodedSpec.mFile);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodedSpec.mFile);
|
||||||
reader.openBuffer(
|
dictDecoder.openDictBuffer(
|
||||||
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||||
if (report) {
|
if (report) {
|
||||||
System.out.println("Format : Binary dictionary format");
|
System.out.println("Format : Binary dictionary format");
|
||||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
||||||
}
|
}
|
||||||
return BinaryDictDecoder.readDictionaryBinary(reader, null);
|
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
} catch (IOException e) {
|
} catch (IOException e) {
|
||||||
|
|
|
@ -16,9 +16,9 @@
|
||||||
|
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.MakedictLog;
|
import com.android.inputmethod.latin.makedict.MakedictLog;
|
||||||
|
@ -176,7 +176,7 @@ public class DictionaryMaker {
|
||||||
inputUnigramXml = filename;
|
inputUnigramXml = filename;
|
||||||
} else if (CombinedInputOutput.isCombinedDictionary(filename)) {
|
} else if (CombinedInputOutput.isCombinedDictionary(filename)) {
|
||||||
inputCombined = filename;
|
inputCombined = filename;
|
||||||
} else if (BinaryDictDecoder.isBinaryDictionary(filename)) {
|
} else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) {
|
||||||
inputBinary = filename;
|
inputBinary = filename;
|
||||||
} else {
|
} else {
|
||||||
throw new IllegalArgumentException(
|
throw new IllegalArgumentException(
|
||||||
|
@ -198,7 +198,7 @@ public class DictionaryMaker {
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
if (null == inputBinary && null == inputUnigramXml) {
|
if (null == inputBinary && null == inputUnigramXml) {
|
||||||
if (BinaryDictDecoder.isBinaryDictionary(arg)) {
|
if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) {
|
||||||
inputBinary = arg;
|
inputBinary = arg;
|
||||||
} else if (CombinedInputOutput.isCombinedDictionary(arg)) {
|
} else if (CombinedInputOutput.isCombinedDictionary(arg)) {
|
||||||
inputCombined = arg;
|
inputCombined = arg;
|
||||||
|
@ -266,9 +266,10 @@ public class DictionaryMaker {
|
||||||
private static FusionDictionary readBinaryFile(final String binaryFilename)
|
private static FusionDictionary readBinaryFile(final String binaryFilename)
|
||||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||||
final File file = new File(binaryFilename);
|
final File file = new File(binaryFilename);
|
||||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
dictDecoder.openDictBuffer(
|
||||||
return BinaryDictDecoder.readDictionaryBinary(reader, null);
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
|
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
|
|
@ -17,8 +17,8 @@
|
||||||
package com.android.inputmethod.latin.dicttool;
|
package com.android.inputmethod.latin.dicttool;
|
||||||
|
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||||
|
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||||
|
@ -67,9 +67,10 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
||||||
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
|
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
|
||||||
}
|
}
|
||||||
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
|
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
|
||||||
final BinaryDictReader reader = new BinaryDictReader(decodeSpec.mFile);
|
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodeSpec.mFile);
|
||||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
dictDecoder.openDictBuffer(
|
||||||
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
|
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||||
|
final FusionDictionary resultDict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder,
|
||||||
null /* dict : an optional dictionary to add words to, or null */);
|
null /* dict : an optional dictionary to add words to, or null */);
|
||||||
assertEquals("Dictionary can't be read back correctly",
|
assertEquals("Dictionary can't be read back correctly",
|
||||||
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
|
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
|
||||||
|
|
Loading…
Reference in New Issue