[Refactor] Rename BinaryDictReader and BinaryDictDecoder.
BinaryDictReader -> BinaryDictDecoder. BinaryDictDecoder -> BianryDictDecoderUtils. Change-Id: Iadf2153b379b760538ecda488dda4f17225e5f37
This commit is contained in:
parent
63155dfa77
commit
77bce05e6f
19 changed files with 1231 additions and 1202 deletions
|
@ -21,7 +21,7 @@ import android.content.SharedPreferences;
|
|||
import android.content.res.AssetFileDescriptor;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
import com.android.inputmethod.latin.utils.DictionaryInfoUtils;
|
||||
|
@ -231,17 +231,17 @@ final public class BinaryDictionaryGetter {
|
|||
try {
|
||||
// Read the version of the file
|
||||
inStream = new FileInputStream(f);
|
||||
final BinaryDictDecoder.ByteBufferWrapper buffer =
|
||||
new BinaryDictDecoder.ByteBufferWrapper(inStream.getChannel().map(
|
||||
final BinaryDictDecoderUtils.ByteBufferDictBuffer dictBuffer =
|
||||
new BinaryDictDecoderUtils.ByteBufferDictBuffer(inStream.getChannel().map(
|
||||
FileChannel.MapMode.READ_ONLY, 0, f.length()));
|
||||
final int magic = buffer.readInt();
|
||||
final int magic = dictBuffer.readInt();
|
||||
if (magic != FormatSpec.MAGIC_NUMBER) {
|
||||
return false;
|
||||
}
|
||||
final int formatVersion = buffer.readInt();
|
||||
final int headerSize = buffer.readInt();
|
||||
final int formatVersion = dictBuffer.readInt();
|
||||
final int headerSize = dictBuffer.readInt();
|
||||
final HashMap<String, String> options = CollectionUtils.newHashMap();
|
||||
BinaryDictDecoder.populateOptions(buffer, headerSize, options);
|
||||
BinaryDictDecoderUtils.populateOptions(dictBuffer, headerSize, options);
|
||||
|
||||
final String version = options.get(VERSION_KEY);
|
||||
if (null == version) {
|
||||
|
|
|
@ -17,35 +17,23 @@
|
|||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
||||
import com.android.inputmethod.latin.utils.JniUtils;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Decodes binary files for a FusionDictionary.
|
||||
*
|
||||
* All the methods in this class are static.
|
||||
*/
|
||||
public final class BinaryDictDecoder {
|
||||
|
||||
private static final boolean DBG = MakedictLog.DBG;
|
||||
@UsedForTesting
|
||||
public class BinaryDictDecoder implements HeaderReader {
|
||||
|
||||
static {
|
||||
JniUtils.loadNativeLibrary();
|
||||
|
@ -54,742 +42,148 @@ public final class BinaryDictDecoder {
|
|||
// TODO: implement something sensical instead of just a phony method
|
||||
private static native int doNothing();
|
||||
|
||||
private BinaryDictDecoder() {
|
||||
// This utility class is not publicly instantiable.
|
||||
}
|
||||
|
||||
private static final int MAX_JUMPS = 12;
|
||||
|
||||
@UsedForTesting
|
||||
public interface FusionDictionaryBufferInterface {
|
||||
public int readUnsignedByte();
|
||||
public int readUnsignedShort();
|
||||
public int readUnsignedInt24();
|
||||
public int readInt();
|
||||
public int position();
|
||||
public void position(int newPosition);
|
||||
public void put(final byte b);
|
||||
public int limit();
|
||||
@UsedForTesting
|
||||
public int capacity();
|
||||
}
|
||||
|
||||
public static final class ByteBufferWrapper implements FusionDictionaryBufferInterface {
|
||||
private ByteBuffer mBuffer;
|
||||
|
||||
public ByteBufferWrapper(final ByteBuffer buffer) {
|
||||
mBuffer = buffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readUnsignedByte() {
|
||||
return mBuffer.get() & 0xFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readUnsignedShort() {
|
||||
return mBuffer.getShort() & 0xFFFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readUnsignedInt24() {
|
||||
final int retval = readUnsignedByte();
|
||||
return (retval << 16) + readUnsignedShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readInt() {
|
||||
return mBuffer.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int position() {
|
||||
return mBuffer.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void position(int newPos) {
|
||||
mBuffer.position(newPos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void put(final byte b) {
|
||||
mBuffer.put(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int limit() {
|
||||
return mBuffer.limit();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int capacity() {
|
||||
return mBuffer.capacity();
|
||||
}
|
||||
public interface DictionaryBufferFactory {
|
||||
public DictBuffer getDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* A class grouping utility function for our specific character encoding.
|
||||
*/
|
||||
static final class CharEncoding {
|
||||
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
|
||||
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
|
||||
|
||||
/**
|
||||
* Helper method to find out whether this code fits on one byte
|
||||
*/
|
||||
private static boolean fitsOnOneByte(final int character) {
|
||||
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
|
||||
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the size of a character given its character code.
|
||||
*
|
||||
* Char format is:
|
||||
* 1 byte = bbbbbbbb match
|
||||
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
|
||||
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
|
||||
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
|
||||
* 00011111 would be outside unicode.
|
||||
* else: iso-latin-1 code
|
||||
* This allows for the whole unicode range to be encoded, including chars outside of
|
||||
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
|
||||
* characters which should never happen anyway (and still work, but take 3 bytes).
|
||||
*
|
||||
* @param character the character code.
|
||||
* @return the size in binary encoded-form, either 1 or 3 bytes.
|
||||
*/
|
||||
static int getCharSize(final int character) {
|
||||
// See char encoding in FusionDictionary.java
|
||||
if (fitsOnOneByte(character)) return 1;
|
||||
if (FormatSpec.INVALID_CHARACTER == character) return 1;
|
||||
return 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the byte size of a character array.
|
||||
*/
|
||||
static int getCharArraySize(final int[] chars) {
|
||||
int size = 0;
|
||||
for (int character : chars) size += getCharSize(character);
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a char array to a byte buffer.
|
||||
*
|
||||
* @param codePoints the code point array to write.
|
||||
* @param buffer the byte buffer to write to.
|
||||
* @param index the index in buffer to write the character array to.
|
||||
* @return the index after the last character.
|
||||
*/
|
||||
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
|
||||
for (int codePoint : codePoints) {
|
||||
if (1 == getCharSize(codePoint)) {
|
||||
buffer[index++] = (byte)codePoint;
|
||||
} else {
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
||||
buffer[index++] = (byte)(0xFF & codePoint);
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string with our character format to a byte buffer.
|
||||
*
|
||||
* This will also write the terminator byte.
|
||||
*
|
||||
* @param buffer the byte buffer to write to.
|
||||
* @param origin the offset to write from.
|
||||
* @param word the string to write.
|
||||
* @return the size written, in bytes.
|
||||
*/
|
||||
static int writeString(final byte[] buffer, final int origin,
|
||||
final String word) {
|
||||
final int length = word.length();
|
||||
int index = origin;
|
||||
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||
final int codePoint = word.codePointAt(i);
|
||||
if (1 == getCharSize(codePoint)) {
|
||||
buffer[index++] = (byte)codePoint;
|
||||
} else {
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
||||
buffer[index++] = (byte)(0xFF & codePoint);
|
||||
}
|
||||
}
|
||||
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
|
||||
return index - origin;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string with our character format to a ByteArrayOutputStream.
|
||||
*
|
||||
* This will also write the terminator byte.
|
||||
*
|
||||
* @param buffer the ByteArrayOutputStream to write to.
|
||||
* @param word the string to write.
|
||||
*/
|
||||
static void writeString(final ByteArrayOutputStream buffer, final String word) {
|
||||
final int length = word.length();
|
||||
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||
final int codePoint = word.codePointAt(i);
|
||||
if (1 == getCharSize(codePoint)) {
|
||||
buffer.write((byte) codePoint);
|
||||
} else {
|
||||
buffer.write((byte) (0xFF & (codePoint >> 16)));
|
||||
buffer.write((byte) (0xFF & (codePoint >> 8)));
|
||||
buffer.write((byte) (0xFF & codePoint));
|
||||
}
|
||||
}
|
||||
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a string from a buffer. This is the converse of the above method.
|
||||
*/
|
||||
static String readString(final FusionDictionaryBufferInterface buffer) {
|
||||
final StringBuilder s = new StringBuilder();
|
||||
int character = readChar(buffer);
|
||||
while (character != FormatSpec.INVALID_CHARACTER) {
|
||||
s.appendCodePoint(character);
|
||||
character = readChar(buffer);
|
||||
}
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a character from the buffer.
|
||||
*
|
||||
* This follows the character format documented earlier in this source file.
|
||||
*
|
||||
* @param buffer the buffer, positioned over an encoded character.
|
||||
* @return the character code.
|
||||
*/
|
||||
static int readChar(final FusionDictionaryBufferInterface buffer) {
|
||||
int character = buffer.readUnsignedByte();
|
||||
if (!fitsOnOneByte(character)) {
|
||||
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
|
||||
return FormatSpec.INVALID_CHARACTER;
|
||||
}
|
||||
character <<= 16;
|
||||
character += buffer.readUnsignedShort();
|
||||
}
|
||||
return character;
|
||||
}
|
||||
}
|
||||
|
||||
// Input methods: Read a binary dictionary to memory.
|
||||
// readDictionaryBinary is the public entry point for them.
|
||||
|
||||
static int readChildrenAddress(final FusionDictionaryBufferInterface buffer,
|
||||
final int optionFlags, final FormatOptions options) {
|
||||
if (options.mSupportsDynamicUpdate) {
|
||||
final int address = buffer.readUnsignedInt24();
|
||||
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||
if ((address & FormatSpec.MSB24) != 0) {
|
||||
return -(address & FormatSpec.SINT24_MAX);
|
||||
} else {
|
||||
return address;
|
||||
}
|
||||
}
|
||||
int address;
|
||||
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
||||
return buffer.readUnsignedByte();
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
||||
return buffer.readUnsignedShort();
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
||||
return buffer.readUnsignedInt24();
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
|
||||
default:
|
||||
return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||
}
|
||||
}
|
||||
|
||||
static int readParentAddress(final FusionDictionaryBufferInterface buffer,
|
||||
final FormatOptions formatOptions) {
|
||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||
final int parentAddress = buffer.readUnsignedInt24();
|
||||
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
|
||||
return sign * (parentAddress & FormatSpec.SINT24_MAX);
|
||||
} else {
|
||||
return FormatSpec.NO_PARENT_ADDRESS;
|
||||
}
|
||||
}
|
||||
|
||||
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
|
||||
public static CharGroupInfo readCharGroup(final FusionDictionaryBufferInterface buffer,
|
||||
final int originalGroupAddress, final FormatOptions options) {
|
||||
int addressPointer = originalGroupAddress;
|
||||
final int flags = buffer.readUnsignedByte();
|
||||
++addressPointer;
|
||||
|
||||
final int parentAddress = readParentAddress(buffer, options);
|
||||
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
|
||||
addressPointer += 3;
|
||||
}
|
||||
|
||||
final int characters[];
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
|
||||
int index = 0;
|
||||
int character = CharEncoding.readChar(buffer);
|
||||
addressPointer += CharEncoding.getCharSize(character);
|
||||
while (-1 != character) {
|
||||
// FusionDictionary is making sure that the length of the word is smaller than
|
||||
// MAX_WORD_LENGTH.
|
||||
// So we'll never write past the end of CHARACTER_BUFFER.
|
||||
CHARACTER_BUFFER[index++] = character;
|
||||
character = CharEncoding.readChar(buffer);
|
||||
addressPointer += CharEncoding.getCharSize(character);
|
||||
}
|
||||
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
|
||||
} else {
|
||||
final int character = CharEncoding.readChar(buffer);
|
||||
addressPointer += CharEncoding.getCharSize(character);
|
||||
characters = new int[] { character };
|
||||
}
|
||||
final int frequency;
|
||||
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
|
||||
++addressPointer;
|
||||
frequency = buffer.readUnsignedByte();
|
||||
} else {
|
||||
frequency = CharGroup.NOT_A_TERMINAL;
|
||||
}
|
||||
int childrenAddress = readChildrenAddress(buffer, flags, options);
|
||||
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
childrenAddress += addressPointer;
|
||||
}
|
||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||
ArrayList<WeightedString> shortcutTargets = null;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
||||
final int pointerBefore = buffer.position();
|
||||
shortcutTargets = new ArrayList<WeightedString>();
|
||||
buffer.readUnsignedShort(); // Skip the size
|
||||
while (true) {
|
||||
final int targetFlags = buffer.readUnsignedByte();
|
||||
final String word = CharEncoding.readString(buffer);
|
||||
shortcutTargets.add(new WeightedString(word,
|
||||
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
|
||||
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
||||
}
|
||||
addressPointer += buffer.position() - pointerBefore;
|
||||
}
|
||||
ArrayList<PendingAttribute> bigrams = null;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
||||
bigrams = new ArrayList<PendingAttribute>();
|
||||
int bigramCount = 0;
|
||||
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||
final int bigramFlags = buffer.readUnsignedByte();
|
||||
++addressPointer;
|
||||
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
|
||||
? 1 : -1;
|
||||
int bigramAddress = addressPointer;
|
||||
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
bigramAddress += sign * buffer.readUnsignedByte();
|
||||
addressPointer += 1;
|
||||
break;
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
bigramAddress += sign * buffer.readUnsignedShort();
|
||||
addressPointer += 2;
|
||||
break;
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
final int offset = (buffer.readUnsignedByte() << 16)
|
||||
+ buffer.readUnsignedShort();
|
||||
bigramAddress += sign * offset;
|
||||
addressPointer += 3;
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("Has bigrams with no address");
|
||||
}
|
||||
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
|
||||
bigramAddress));
|
||||
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
||||
}
|
||||
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||
MakedictLog.d("too many bigrams in a group.");
|
||||
}
|
||||
}
|
||||
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
|
||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and returns the char group count out of a buffer and forwards the pointer.
|
||||
*/
|
||||
public static int readCharGroupCount(final FusionDictionaryBufferInterface buffer) {
|
||||
final int msb = buffer.readUnsignedByte();
|
||||
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
|
||||
return msb;
|
||||
} else {
|
||||
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
|
||||
+ buffer.readUnsignedByte();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds, as a string, the word at the address passed as an argument.
|
||||
* Creates DictionaryBuffer using a ByteBuffer
|
||||
*
|
||||
* @param buffer the buffer to read from.
|
||||
* @param headerSize the size of the header.
|
||||
* @param address the address to seek.
|
||||
* @param formatOptions file format options.
|
||||
* @return the word with its frequency, as a weighted string.
|
||||
* This class uses less memory than DictionaryBufferFromByteArrayFactory,
|
||||
* but doesn't perform as fast.
|
||||
* When operating on a big dictionary, this class is preferred.
|
||||
*/
|
||||
/* package for tests */ static WeightedString getWordAtAddress(
|
||||
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
|
||||
final FormatOptions formatOptions) {
|
||||
final WeightedString result;
|
||||
final int originalPointer = buffer.position();
|
||||
buffer.position(address);
|
||||
|
||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||
result = getWordAtAddressWithParentAddress(buffer, headerSize, address, formatOptions);
|
||||
} else {
|
||||
result = getWordAtAddressWithoutParentAddress(buffer, headerSize, address,
|
||||
formatOptions);
|
||||
}
|
||||
|
||||
buffer.position(originalPointer);
|
||||
return result;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private static WeightedString getWordAtAddressWithParentAddress(
|
||||
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
|
||||
final FormatOptions options) {
|
||||
int currentAddress = address;
|
||||
int frequency = Integer.MIN_VALUE;
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
||||
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
|
||||
CharGroupInfo currentInfo;
|
||||
int loopCounter = 0;
|
||||
do {
|
||||
buffer.position(currentAddress + headerSize);
|
||||
currentInfo = readCharGroup(buffer, currentAddress, options);
|
||||
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
|
||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||
}
|
||||
if (DBG && loopCounter++ > MAX_JUMPS) {
|
||||
MakedictLog.d("Too many jumps - probably a bug");
|
||||
}
|
||||
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
|
||||
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
|
||||
builder.insert(0,
|
||||
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
|
||||
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
|
||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||
}
|
||||
return new WeightedString(builder.toString(), frequency);
|
||||
}
|
||||
|
||||
private static WeightedString getWordAtAddressWithoutParentAddress(
|
||||
final FusionDictionaryBufferInterface buffer, final int headerSize, final int address,
|
||||
final FormatOptions options) {
|
||||
buffer.position(headerSize);
|
||||
final int count = readCharGroupCount(buffer);
|
||||
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
WeightedString result = null;
|
||||
|
||||
CharGroupInfo last = null;
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
||||
groupOffset = info.mEndAddress;
|
||||
if (info.mOriginalAddress == address) {
|
||||
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
|
||||
result = new WeightedString(builder.toString(), info.mFrequency);
|
||||
break; // and return
|
||||
}
|
||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||
if (info.mChildrenAddress > address) {
|
||||
if (null == last) continue;
|
||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||
buffer.position(last.mChildrenAddress + headerSize);
|
||||
i = readCharGroupCount(buffer);
|
||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||
last = null;
|
||||
continue;
|
||||
}
|
||||
last = info;
|
||||
}
|
||||
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
|
||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||
buffer.position(last.mChildrenAddress + headerSize);
|
||||
i = readCharGroupCount(buffer);
|
||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||
last = null;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a single node array from a buffer.
|
||||
*
|
||||
* This methods reads the file at the current position. A node array is fully expected to start
|
||||
* at the current position.
|
||||
* This will recursively read other node arrays into the structure, populating the reverse
|
||||
* maps on the fly and using them to keep track of already read nodes.
|
||||
*
|
||||
* @param buffer the buffer, correctly positioned at the start of a node array.
|
||||
* @param headerSize the size, in bytes, of the file header.
|
||||
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
|
||||
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
||||
* @param options file format options.
|
||||
* @return the read node array with all his children already read.
|
||||
*/
|
||||
private static PtNodeArray readNodeArray(final FusionDictionaryBufferInterface buffer,
|
||||
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
|
||||
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
|
||||
throws IOException {
|
||||
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
||||
final int nodeArrayOrigin = buffer.position() - headerSize;
|
||||
|
||||
do { // Scan the linked-list node.
|
||||
final int nodeArrayHeadPosition = buffer.position() - headerSize;
|
||||
final int count = readCharGroupCount(buffer);
|
||||
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||
CharGroupInfo info = readCharGroup(buffer, groupOffset, options);
|
||||
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
||||
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
||||
ArrayList<WeightedString> bigrams = null;
|
||||
if (null != info.mBigrams) {
|
||||
bigrams = new ArrayList<WeightedString>();
|
||||
for (PendingAttribute bigram : info.mBigrams) {
|
||||
final WeightedString word = getWordAtAddress(
|
||||
buffer, headerSize, bigram.mAddress, options);
|
||||
final int reconstructedFrequency =
|
||||
reconstructBigramFrequency(word.mFrequency, bigram.mFrequency);
|
||||
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
|
||||
}
|
||||
}
|
||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
||||
if (null == children) {
|
||||
final int currentPosition = buffer.position();
|
||||
buffer.position(info.mChildrenAddress + headerSize);
|
||||
children = readNodeArray(
|
||||
buffer, headerSize, reverseNodeArrayMap, reverseGroupMap, options);
|
||||
buffer.position(currentPosition);
|
||||
}
|
||||
nodeArrayContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||
info.mFrequency,
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
||||
} else {
|
||||
nodeArrayContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||
info.mFrequency,
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
|
||||
}
|
||||
groupOffset = info.mEndAddress;
|
||||
}
|
||||
|
||||
// reach the end of the array.
|
||||
if (options.mSupportsDynamicUpdate) {
|
||||
final int nextAddress = buffer.readUnsignedInt24();
|
||||
if (nextAddress >= 0 && nextAddress < buffer.limit()) {
|
||||
buffer.position(nextAddress);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (options.mSupportsDynamicUpdate &&
|
||||
buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
|
||||
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
||||
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
||||
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
||||
return nodeArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to get the binary format version from the header.
|
||||
* @throws IOException
|
||||
*/
|
||||
private static int getFormatVersion(final FusionDictionaryBufferInterface buffer)
|
||||
throws IOException {
|
||||
final int magic = buffer.readInt();
|
||||
if (FormatSpec.MAGIC_NUMBER == magic) return buffer.readUnsignedShort();
|
||||
return FormatSpec.NOT_A_VERSION_NUMBER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to get and validate the binary format version.
|
||||
* @throws UnsupportedFormatException
|
||||
* @throws IOException
|
||||
*/
|
||||
static int checkFormatVersion(final FusionDictionaryBufferInterface buffer)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = getFormatVersion(buffer);
|
||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
||||
throw new UnsupportedFormatException("This file has version " + version
|
||||
+ ", but this implementation does not support versions above "
|
||||
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a header from a buffer.
|
||||
* @param headerReader the header reader
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
public static FileHeader readHeader(final HeaderReaderInterface headerReader)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = headerReader.readVersion();
|
||||
final int optionsFlags = headerReader.readOptionFlags();
|
||||
|
||||
final int headerSize = headerReader.readHeaderSize();
|
||||
|
||||
if (headerSize < 0) {
|
||||
throw new UnsupportedFormatException("header size can't be negative.");
|
||||
}
|
||||
|
||||
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
|
||||
|
||||
final FileHeader header = new FileHeader(headerSize,
|
||||
new FusionDictionary.DictionaryOptions(attributes,
|
||||
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
||||
new FormatOptions(version,
|
||||
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads options from a buffer and populate a map with their contents.
|
||||
*
|
||||
* The buffer is read at the current position, so the caller must take care the pointer
|
||||
* is in the right place before calling this.
|
||||
*/
|
||||
public static void populateOptions(final FusionDictionaryBufferInterface buffer,
|
||||
final int headerSize, final HashMap<String, String> options) {
|
||||
while (buffer.position() < headerSize) {
|
||||
final String key = CharEncoding.readString(buffer);
|
||||
final String value = CharEncoding.readString(buffer);
|
||||
options.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a buffer and returns the memory representation of the dictionary.
|
||||
*
|
||||
* This high-level method takes a buffer and reads its contents, populating a
|
||||
* FusionDictionary structure. The optional dict argument is an existing dictionary to
|
||||
* which words from the buffer should be added. If it is null, a new dictionary is created.
|
||||
*
|
||||
* @param reader the reader.
|
||||
* @param dict an optional dictionary to add words to, or null.
|
||||
* @return the created (or merged) dictionary.
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static FusionDictionary readDictionaryBinary(final BinaryDictReader reader,
|
||||
final FusionDictionary dict) throws FileNotFoundException, IOException,
|
||||
UnsupportedFormatException {
|
||||
|
||||
// if the buffer has not been opened, open the buffer with bytebuffer.
|
||||
if (reader.getBuffer() == null) reader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
if (reader.getBuffer() == null) {
|
||||
MakedictLog.e("Cannot open the buffer");
|
||||
}
|
||||
|
||||
// Read header
|
||||
final FileHeader fileHeader = readHeader(reader);
|
||||
|
||||
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||
final PtNodeArray root = readNodeArray(reader.getBuffer(), fileHeader.mHeaderSize,
|
||||
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
|
||||
|
||||
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
||||
if (null != dict) {
|
||||
for (final Word w : dict) {
|
||||
if (w.mIsBlacklistEntry) {
|
||||
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
|
||||
} else {
|
||||
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
|
||||
}
|
||||
}
|
||||
for (final Word w : dict) {
|
||||
// By construction a binary dictionary may not have bigrams pointing to
|
||||
// words that are not also registered as unigrams so we don't have to avoid
|
||||
// them explicitly here.
|
||||
for (final WeightedString bigram : w.mBigrams) {
|
||||
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return newDict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
|
||||
*/
|
||||
public static boolean isBinaryDictionary(final String filename) {
|
||||
final File file = new File(filename);
|
||||
return isBinaryDictionary(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic test to find out whether the file is a binary dictionary or not.
|
||||
*
|
||||
* Concretely this only tests the magic number.
|
||||
*
|
||||
* @param file The file to test.
|
||||
* @return true if it's a binary dictionary, false otherwise
|
||||
*/
|
||||
public static boolean isBinaryDictionary(final File file) {
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
final ByteBuffer buffer = inStream.getChannel().map(
|
||||
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
||||
final int version = getFormatVersion(new ByteBufferWrapper(buffer));
|
||||
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
||||
} catch (FileNotFoundException e) {
|
||||
return false;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
try {
|
||||
public static final class DictionaryBufferFromReadOnlyByteBufferFactory
|
||||
implements DictionaryBufferFactory {
|
||||
@Override
|
||||
public DictBuffer getDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
FileInputStream inStream = null;
|
||||
ByteBuffer buffer = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
|
||||
0, file.length());
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
inStream.close();
|
||||
}
|
||||
}
|
||||
if (buffer != null) {
|
||||
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates DictionaryBuffer using a byte array
|
||||
*
|
||||
* This class performs faster than other classes, but consumes more memory.
|
||||
* When operating on a small dictionary, this class is preferred.
|
||||
*/
|
||||
public static final class DictionaryBufferFromByteArrayFactory
|
||||
implements DictionaryBufferFactory {
|
||||
@Override
|
||||
public DictBuffer getDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
final byte[] array = new byte[(int) file.length()];
|
||||
inStream.read(array);
|
||||
return new ByteArrayDictBuffer(array);
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
inStream.close();
|
||||
} catch (IOException e) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate bigram frequency from compressed value
|
||||
* Creates DictionaryBuffer using a writable ByteBuffer and a RandomAccessFile.
|
||||
*
|
||||
* @param unigramFrequency
|
||||
* @param bigramFrequency compressed frequency
|
||||
* @return approximate bigram frequency
|
||||
* This class doesn't perform as fast as other classes,
|
||||
* but this class is the only option available for destructive operations (insert or delete)
|
||||
* on a dictionary.
|
||||
*/
|
||||
public static int reconstructBigramFrequency(final int unigramFrequency,
|
||||
final int bigramFrequency) {
|
||||
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
|
||||
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
|
||||
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
|
||||
return (int)resultFreqFloat;
|
||||
@UsedForTesting
|
||||
public static final class DictionaryBufferFromWritableByteBufferFactory
|
||||
implements DictionaryBufferFactory {
|
||||
@Override
|
||||
public DictBuffer getDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
RandomAccessFile raFile = null;
|
||||
ByteBuffer buffer = null;
|
||||
try {
|
||||
raFile = new RandomAccessFile(file, "rw");
|
||||
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
|
||||
} finally {
|
||||
if (raFile != null) {
|
||||
raFile.close();
|
||||
}
|
||||
}
|
||||
if (buffer != null) {
|
||||
return new BinaryDictDecoderUtils.ByteBufferDictBuffer(buffer);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private final File mDictionaryBinaryFile;
|
||||
private DictBuffer mDictBuffer;
|
||||
|
||||
public BinaryDictDecoder(final File file) {
|
||||
mDictionaryBinaryFile = file;
|
||||
mDictBuffer = null;
|
||||
}
|
||||
|
||||
public void openDictBuffer(final DictionaryBufferFactory factory)
|
||||
throws FileNotFoundException, IOException {
|
||||
mDictBuffer = factory.getDictionaryBuffer(mDictionaryBinaryFile);
|
||||
}
|
||||
|
||||
public DictBuffer getDictBuffer() {
|
||||
return mDictBuffer;
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public DictBuffer openAndGetDictBuffer(
|
||||
final DictionaryBufferFactory factory)
|
||||
throws FileNotFoundException, IOException {
|
||||
openDictBuffer(factory);
|
||||
return getDictBuffer();
|
||||
}
|
||||
|
||||
// The implementation of HeaderReader
|
||||
@Override
|
||||
public int readVersion() throws IOException, UnsupportedFormatException {
|
||||
return BinaryDictDecoderUtils.checkFormatVersion(mDictBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readOptionFlags() {
|
||||
return mDictBuffer.readUnsignedShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readHeaderSize() {
|
||||
return mDictBuffer.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashMap<String, String> readAttributes(final int headerSize) {
|
||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||
while (mDictBuffer.position() < headerSize) {
|
||||
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
|
||||
// by calling CharEncoding.readString.
|
||||
final String key = CharEncoding.readString(mDictBuffer);
|
||||
final String value = CharEncoding.readString(mDictBuffer);
|
||||
attributes.put(key, value);
|
||||
}
|
||||
mDictBuffer.position(headerSize);
|
||||
return attributes;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -0,0 +1,777 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReader;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
import java.util.TreeMap;
|
||||
|
||||
/**
|
||||
* Decodes binary files for a FusionDictionary.
|
||||
*
|
||||
* All the methods in this class are static.
|
||||
*
|
||||
* TODO: Remove calls from classes except BinaryDictDecoder
|
||||
* TODO: Move this file to makedict/internal.
|
||||
*/
|
||||
public final class BinaryDictDecoderUtils {
|
||||
|
||||
private static final boolean DBG = MakedictLog.DBG;
|
||||
|
||||
private BinaryDictDecoderUtils() {
|
||||
// This utility class is not publicly instantiable.
|
||||
}
|
||||
|
||||
private static final int MAX_JUMPS = 12;
|
||||
|
||||
@UsedForTesting
|
||||
public interface DictBuffer {
|
||||
public int readUnsignedByte();
|
||||
public int readUnsignedShort();
|
||||
public int readUnsignedInt24();
|
||||
public int readInt();
|
||||
public int position();
|
||||
public void position(int newPosition);
|
||||
public void put(final byte b);
|
||||
public int limit();
|
||||
@UsedForTesting
|
||||
public int capacity();
|
||||
}
|
||||
|
||||
public static final class ByteBufferDictBuffer implements DictBuffer {
|
||||
private ByteBuffer mBuffer;
|
||||
|
||||
public ByteBufferDictBuffer(final ByteBuffer buffer) {
|
||||
mBuffer = buffer;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readUnsignedByte() {
|
||||
return mBuffer.get() & 0xFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readUnsignedShort() {
|
||||
return mBuffer.getShort() & 0xFFFF;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readUnsignedInt24() {
|
||||
final int retval = readUnsignedByte();
|
||||
return (retval << 16) + readUnsignedShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readInt() {
|
||||
return mBuffer.getInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int position() {
|
||||
return mBuffer.position();
|
||||
}
|
||||
|
||||
@Override
|
||||
public void position(int newPos) {
|
||||
mBuffer.position(newPos);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void put(final byte b) {
|
||||
mBuffer.put(b);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int limit() {
|
||||
return mBuffer.limit();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int capacity() {
|
||||
return mBuffer.capacity();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* A class grouping utility function for our specific character encoding.
|
||||
*/
|
||||
static final class CharEncoding {
|
||||
private static final int MINIMAL_ONE_BYTE_CHARACTER_VALUE = 0x20;
|
||||
private static final int MAXIMAL_ONE_BYTE_CHARACTER_VALUE = 0xFF;
|
||||
|
||||
/**
|
||||
* Helper method to find out whether this code fits on one byte
|
||||
*/
|
||||
private static boolean fitsOnOneByte(final int character) {
|
||||
return character >= MINIMAL_ONE_BYTE_CHARACTER_VALUE
|
||||
&& character <= MAXIMAL_ONE_BYTE_CHARACTER_VALUE;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the size of a character given its character code.
|
||||
*
|
||||
* Char format is:
|
||||
* 1 byte = bbbbbbbb match
|
||||
* case 000xxxxx: xxxxx << 16 + next byte << 8 + next byte
|
||||
* else: if 00011111 (= 0x1F) : this is the terminator. This is a relevant choice because
|
||||
* unicode code points range from 0 to 0x10FFFF, so any 3-byte value starting with
|
||||
* 00011111 would be outside unicode.
|
||||
* else: iso-latin-1 code
|
||||
* This allows for the whole unicode range to be encoded, including chars outside of
|
||||
* the BMP. Also everything in the iso-latin-1 charset is only 1 byte, except control
|
||||
* characters which should never happen anyway (and still work, but take 3 bytes).
|
||||
*
|
||||
* @param character the character code.
|
||||
* @return the size in binary encoded-form, either 1 or 3 bytes.
|
||||
*/
|
||||
static int getCharSize(final int character) {
|
||||
// See char encoding in FusionDictionary.java
|
||||
if (fitsOnOneByte(character)) return 1;
|
||||
if (FormatSpec.INVALID_CHARACTER == character) return 1;
|
||||
return 3;
|
||||
}
|
||||
|
||||
/**
|
||||
* Compute the byte size of a character array.
|
||||
*/
|
||||
static int getCharArraySize(final int[] chars) {
|
||||
int size = 0;
|
||||
for (int character : chars) size += getCharSize(character);
|
||||
return size;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a char array to a byte buffer.
|
||||
*
|
||||
* @param codePoints the code point array to write.
|
||||
* @param buffer the byte buffer to write to.
|
||||
* @param index the index in buffer to write the character array to.
|
||||
* @return the index after the last character.
|
||||
*/
|
||||
static int writeCharArray(final int[] codePoints, final byte[] buffer, int index) {
|
||||
for (int codePoint : codePoints) {
|
||||
if (1 == getCharSize(codePoint)) {
|
||||
buffer[index++] = (byte)codePoint;
|
||||
} else {
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
||||
buffer[index++] = (byte)(0xFF & codePoint);
|
||||
}
|
||||
}
|
||||
return index;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string with our character format to a byte buffer.
|
||||
*
|
||||
* This will also write the terminator byte.
|
||||
*
|
||||
* @param buffer the byte buffer to write to.
|
||||
* @param origin the offset to write from.
|
||||
* @param word the string to write.
|
||||
* @return the size written, in bytes.
|
||||
*/
|
||||
static int writeString(final byte[] buffer, final int origin,
|
||||
final String word) {
|
||||
final int length = word.length();
|
||||
int index = origin;
|
||||
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||
final int codePoint = word.codePointAt(i);
|
||||
if (1 == getCharSize(codePoint)) {
|
||||
buffer[index++] = (byte)codePoint;
|
||||
} else {
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 16));
|
||||
buffer[index++] = (byte)(0xFF & (codePoint >> 8));
|
||||
buffer[index++] = (byte)(0xFF & codePoint);
|
||||
}
|
||||
}
|
||||
buffer[index++] = FormatSpec.GROUP_CHARACTERS_TERMINATOR;
|
||||
return index - origin;
|
||||
}
|
||||
|
||||
/**
|
||||
* Writes a string with our character format to a ByteArrayOutputStream.
|
||||
*
|
||||
* This will also write the terminator byte.
|
||||
*
|
||||
* @param buffer the ByteArrayOutputStream to write to.
|
||||
* @param word the string to write.
|
||||
*/
|
||||
static void writeString(final ByteArrayOutputStream buffer, final String word) {
|
||||
final int length = word.length();
|
||||
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||
final int codePoint = word.codePointAt(i);
|
||||
if (1 == getCharSize(codePoint)) {
|
||||
buffer.write((byte) codePoint);
|
||||
} else {
|
||||
buffer.write((byte) (0xFF & (codePoint >> 16)));
|
||||
buffer.write((byte) (0xFF & (codePoint >> 8)));
|
||||
buffer.write((byte) (0xFF & codePoint));
|
||||
}
|
||||
}
|
||||
buffer.write(FormatSpec.GROUP_CHARACTERS_TERMINATOR);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a string from a DictBuffer. This is the converse of the above method.
|
||||
*/
|
||||
static String readString(final DictBuffer dictBuffer) {
|
||||
final StringBuilder s = new StringBuilder();
|
||||
int character = readChar(dictBuffer);
|
||||
while (character != FormatSpec.INVALID_CHARACTER) {
|
||||
s.appendCodePoint(character);
|
||||
character = readChar(dictBuffer);
|
||||
}
|
||||
return s.toString();
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a character from the buffer.
|
||||
*
|
||||
* This follows the character format documented earlier in this source file.
|
||||
*
|
||||
* @param dictBuffer the buffer, positioned over an encoded character.
|
||||
* @return the character code.
|
||||
*/
|
||||
static int readChar(final DictBuffer dictBuffer) {
|
||||
int character = dictBuffer.readUnsignedByte();
|
||||
if (!fitsOnOneByte(character)) {
|
||||
if (FormatSpec.GROUP_CHARACTERS_TERMINATOR == character) {
|
||||
return FormatSpec.INVALID_CHARACTER;
|
||||
}
|
||||
character <<= 16;
|
||||
character += dictBuffer.readUnsignedShort();
|
||||
}
|
||||
return character;
|
||||
}
|
||||
}
|
||||
|
||||
// Input methods: Read a binary dictionary to memory.
|
||||
// readDictionaryBinary is the public entry point for them.
|
||||
|
||||
static int readChildrenAddress(final DictBuffer dictBuffer,
|
||||
final int optionFlags, final FormatOptions options) {
|
||||
if (options.mSupportsDynamicUpdate) {
|
||||
final int address = dictBuffer.readUnsignedInt24();
|
||||
if (address == 0) return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||
if ((address & FormatSpec.MSB24) != 0) {
|
||||
return -(address & FormatSpec.SINT24_MAX);
|
||||
} else {
|
||||
return address;
|
||||
}
|
||||
}
|
||||
int address;
|
||||
switch (optionFlags & FormatSpec.MASK_GROUP_ADDRESS_TYPE) {
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_ONEBYTE:
|
||||
return dictBuffer.readUnsignedByte();
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_TWOBYTES:
|
||||
return dictBuffer.readUnsignedShort();
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_THREEBYTES:
|
||||
return dictBuffer.readUnsignedInt24();
|
||||
case FormatSpec.FLAG_GROUP_ADDRESS_TYPE_NOADDRESS:
|
||||
default:
|
||||
return FormatSpec.NO_CHILDREN_ADDRESS;
|
||||
}
|
||||
}
|
||||
|
||||
static int readParentAddress(final DictBuffer dictBuffer,
|
||||
final FormatOptions formatOptions) {
|
||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||
final int parentAddress = dictBuffer.readUnsignedInt24();
|
||||
final int sign = ((parentAddress & FormatSpec.MSB24) != 0) ? -1 : 1;
|
||||
return sign * (parentAddress & FormatSpec.SINT24_MAX);
|
||||
} else {
|
||||
return FormatSpec.NO_PARENT_ADDRESS;
|
||||
}
|
||||
}
|
||||
|
||||
private static final int[] CHARACTER_BUFFER = new int[FormatSpec.MAX_WORD_LENGTH];
|
||||
public static CharGroupInfo readCharGroup(final DictBuffer dictBuffer,
|
||||
final int originalGroupAddress, final FormatOptions options) {
|
||||
int addressPointer = originalGroupAddress;
|
||||
final int flags = dictBuffer.readUnsignedByte();
|
||||
++addressPointer;
|
||||
|
||||
final int parentAddress = readParentAddress(dictBuffer, options);
|
||||
if (BinaryDictIOUtils.supportsDynamicUpdate(options)) {
|
||||
addressPointer += 3;
|
||||
}
|
||||
|
||||
final int characters[];
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS)) {
|
||||
int index = 0;
|
||||
int character = CharEncoding.readChar(dictBuffer);
|
||||
addressPointer += CharEncoding.getCharSize(character);
|
||||
while (-1 != character) {
|
||||
// FusionDictionary is making sure that the length of the word is smaller than
|
||||
// MAX_WORD_LENGTH.
|
||||
// So we'll never write past the end of CHARACTER_BUFFER.
|
||||
CHARACTER_BUFFER[index++] = character;
|
||||
character = CharEncoding.readChar(dictBuffer);
|
||||
addressPointer += CharEncoding.getCharSize(character);
|
||||
}
|
||||
characters = Arrays.copyOfRange(CHARACTER_BUFFER, 0, index);
|
||||
} else {
|
||||
final int character = CharEncoding.readChar(dictBuffer);
|
||||
addressPointer += CharEncoding.getCharSize(character);
|
||||
characters = new int[] { character };
|
||||
}
|
||||
final int frequency;
|
||||
if (0 != (FormatSpec.FLAG_IS_TERMINAL & flags)) {
|
||||
++addressPointer;
|
||||
frequency = dictBuffer.readUnsignedByte();
|
||||
} else {
|
||||
frequency = CharGroup.NOT_A_TERMINAL;
|
||||
}
|
||||
int childrenAddress = readChildrenAddress(dictBuffer, flags, options);
|
||||
if (childrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
childrenAddress += addressPointer;
|
||||
}
|
||||
addressPointer += BinaryDictIOUtils.getChildrenAddressSize(flags, options);
|
||||
ArrayList<WeightedString> shortcutTargets = null;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS)) {
|
||||
final int pointerBefore = dictBuffer.position();
|
||||
shortcutTargets = new ArrayList<WeightedString>();
|
||||
dictBuffer.readUnsignedShort(); // Skip the size
|
||||
while (true) {
|
||||
final int targetFlags = dictBuffer.readUnsignedByte();
|
||||
final String word = CharEncoding.readString(dictBuffer);
|
||||
shortcutTargets.add(new WeightedString(word,
|
||||
targetFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY));
|
||||
if (0 == (targetFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
||||
}
|
||||
addressPointer += dictBuffer.position() - pointerBefore;
|
||||
}
|
||||
ArrayList<PendingAttribute> bigrams = null;
|
||||
if (0 != (flags & FormatSpec.FLAG_HAS_BIGRAMS)) {
|
||||
bigrams = new ArrayList<PendingAttribute>();
|
||||
int bigramCount = 0;
|
||||
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||
final int bigramFlags = dictBuffer.readUnsignedByte();
|
||||
++addressPointer;
|
||||
final int sign = 0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE)
|
||||
? 1 : -1;
|
||||
int bigramAddress = addressPointer;
|
||||
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
bigramAddress += sign * dictBuffer.readUnsignedByte();
|
||||
addressPointer += 1;
|
||||
break;
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
bigramAddress += sign * dictBuffer.readUnsignedShort();
|
||||
addressPointer += 2;
|
||||
break;
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
final int offset = (dictBuffer.readUnsignedByte() << 16)
|
||||
+ dictBuffer.readUnsignedShort();
|
||||
bigramAddress += sign * offset;
|
||||
addressPointer += 3;
|
||||
break;
|
||||
default:
|
||||
throw new RuntimeException("Has bigrams with no address");
|
||||
}
|
||||
bigrams.add(new PendingAttribute(bigramFlags & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY,
|
||||
bigramAddress));
|
||||
if (0 == (bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT)) break;
|
||||
}
|
||||
if (bigramCount >= FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||
MakedictLog.d("too many bigrams in a group.");
|
||||
}
|
||||
}
|
||||
return new CharGroupInfo(originalGroupAddress, addressPointer, flags, characters, frequency,
|
||||
parentAddress, childrenAddress, shortcutTargets, bigrams);
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads and returns the char group count out of a buffer and forwards the pointer.
|
||||
*/
|
||||
public static int readCharGroupCount(final DictBuffer dictBuffer) {
|
||||
final int msb = dictBuffer.readUnsignedByte();
|
||||
if (FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
|
||||
return msb;
|
||||
} else {
|
||||
return ((FormatSpec.MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
|
||||
+ dictBuffer.readUnsignedByte();
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Finds, as a string, the word at the address passed as an argument.
|
||||
*
|
||||
* @param dictBuffer the buffer to read from.
|
||||
* @param headerSize the size of the header.
|
||||
* @param address the address to seek.
|
||||
* @param formatOptions file format options.
|
||||
* @return the word with its frequency, as a weighted string.
|
||||
*/
|
||||
/* package for tests */ static WeightedString getWordAtAddress(
|
||||
final DictBuffer dictBuffer, final int headerSize, final int address,
|
||||
final FormatOptions formatOptions) {
|
||||
final WeightedString result;
|
||||
final int originalPointer = dictBuffer.position();
|
||||
dictBuffer.position(address);
|
||||
|
||||
if (BinaryDictIOUtils.supportsDynamicUpdate(formatOptions)) {
|
||||
result = getWordAtAddressWithParentAddress(dictBuffer, headerSize, address,
|
||||
formatOptions);
|
||||
} else {
|
||||
result = getWordAtAddressWithoutParentAddress(dictBuffer, headerSize, address,
|
||||
formatOptions);
|
||||
}
|
||||
|
||||
dictBuffer.position(originalPointer);
|
||||
return result;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private static WeightedString getWordAtAddressWithParentAddress(
|
||||
final DictBuffer dictBuffer, final int headerSize, final int address,
|
||||
final FormatOptions options) {
|
||||
int currentAddress = address;
|
||||
int frequency = Integer.MIN_VALUE;
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
// the length of the path from the root to the leaf is limited by MAX_WORD_LENGTH
|
||||
for (int count = 0; count < FormatSpec.MAX_WORD_LENGTH; ++count) {
|
||||
CharGroupInfo currentInfo;
|
||||
int loopCounter = 0;
|
||||
do {
|
||||
dictBuffer.position(currentAddress + headerSize);
|
||||
currentInfo = readCharGroup(dictBuffer, currentAddress, options);
|
||||
if (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options)) {
|
||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||
}
|
||||
if (DBG && loopCounter++ > MAX_JUMPS) {
|
||||
MakedictLog.d("Too many jumps - probably a bug");
|
||||
}
|
||||
} while (BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags, options));
|
||||
if (Integer.MIN_VALUE == frequency) frequency = currentInfo.mFrequency;
|
||||
builder.insert(0,
|
||||
new String(currentInfo.mCharacters, 0, currentInfo.mCharacters.length));
|
||||
if (currentInfo.mParentAddress == FormatSpec.NO_PARENT_ADDRESS) break;
|
||||
currentAddress = currentInfo.mParentAddress + currentInfo.mOriginalAddress;
|
||||
}
|
||||
return new WeightedString(builder.toString(), frequency);
|
||||
}
|
||||
|
||||
private static WeightedString getWordAtAddressWithoutParentAddress(
|
||||
final DictBuffer dictBuffer, final int headerSize, final int address,
|
||||
final FormatOptions options) {
|
||||
dictBuffer.position(headerSize);
|
||||
final int count = readCharGroupCount(dictBuffer);
|
||||
int groupOffset = BinaryDictIOUtils.getGroupCountSize(count);
|
||||
final StringBuilder builder = new StringBuilder();
|
||||
WeightedString result = null;
|
||||
|
||||
CharGroupInfo last = null;
|
||||
for (int i = count - 1; i >= 0; --i) {
|
||||
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
|
||||
groupOffset = info.mEndAddress;
|
||||
if (info.mOriginalAddress == address) {
|
||||
builder.append(new String(info.mCharacters, 0, info.mCharacters.length));
|
||||
result = new WeightedString(builder.toString(), info.mFrequency);
|
||||
break; // and return
|
||||
}
|
||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||
if (info.mChildrenAddress > address) {
|
||||
if (null == last) continue;
|
||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||
dictBuffer.position(last.mChildrenAddress + headerSize);
|
||||
i = readCharGroupCount(dictBuffer);
|
||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||
last = null;
|
||||
continue;
|
||||
}
|
||||
last = info;
|
||||
}
|
||||
if (0 == i && BinaryDictIOUtils.hasChildrenAddress(last.mChildrenAddress)) {
|
||||
builder.append(new String(last.mCharacters, 0, last.mCharacters.length));
|
||||
dictBuffer.position(last.mChildrenAddress + headerSize);
|
||||
i = readCharGroupCount(dictBuffer);
|
||||
groupOffset = last.mChildrenAddress + BinaryDictIOUtils.getGroupCountSize(i);
|
||||
last = null;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a single node array from a buffer.
|
||||
*
|
||||
* This methods reads the file at the current position. A node array is fully expected to start
|
||||
* at the current position.
|
||||
* This will recursively read other node arrays into the structure, populating the reverse
|
||||
* maps on the fly and using them to keep track of already read nodes.
|
||||
*
|
||||
* @param dictBuffer the buffer, correctly positioned at the start of a node array.
|
||||
* @param headerSize the size, in bytes, of the file header.
|
||||
* @param reverseNodeArrayMap a mapping from addresses to already read node arrays.
|
||||
* @param reverseGroupMap a mapping from addresses to already read character groups.
|
||||
* @param options file format options.
|
||||
* @return the read node array with all his children already read.
|
||||
*/
|
||||
private static PtNodeArray readNodeArray(final DictBuffer dictBuffer,
|
||||
final int headerSize, final Map<Integer, PtNodeArray> reverseNodeArrayMap,
|
||||
final Map<Integer, CharGroup> reverseGroupMap, final FormatOptions options)
|
||||
throws IOException {
|
||||
final ArrayList<CharGroup> nodeArrayContents = new ArrayList<CharGroup>();
|
||||
final int nodeArrayOrigin = dictBuffer.position() - headerSize;
|
||||
|
||||
do { // Scan the linked-list node.
|
||||
final int nodeArrayHeadPosition = dictBuffer.position() - headerSize;
|
||||
final int count = readCharGroupCount(dictBuffer);
|
||||
int groupOffset = nodeArrayHeadPosition + BinaryDictIOUtils.getGroupCountSize(count);
|
||||
for (int i = count; i > 0; --i) { // Scan the array of CharGroup.
|
||||
CharGroupInfo info = readCharGroup(dictBuffer, groupOffset, options);
|
||||
if (BinaryDictIOUtils.isMovedGroup(info.mFlags, options)) continue;
|
||||
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
|
||||
ArrayList<WeightedString> bigrams = null;
|
||||
if (null != info.mBigrams) {
|
||||
bigrams = new ArrayList<WeightedString>();
|
||||
for (PendingAttribute bigram : info.mBigrams) {
|
||||
final WeightedString word = getWordAtAddress(
|
||||
dictBuffer, headerSize, bigram.mAddress, options);
|
||||
final int reconstructedFrequency =
|
||||
BinaryDictIOUtils.reconstructBigramFrequency(word.mFrequency,
|
||||
bigram.mFrequency);
|
||||
bigrams.add(new WeightedString(word.mWord, reconstructedFrequency));
|
||||
}
|
||||
}
|
||||
if (BinaryDictIOUtils.hasChildrenAddress(info.mChildrenAddress)) {
|
||||
PtNodeArray children = reverseNodeArrayMap.get(info.mChildrenAddress);
|
||||
if (null == children) {
|
||||
final int currentPosition = dictBuffer.position();
|
||||
dictBuffer.position(info.mChildrenAddress + headerSize);
|
||||
children = readNodeArray(dictBuffer, headerSize, reverseNodeArrayMap,
|
||||
reverseGroupMap, options);
|
||||
dictBuffer.position(currentPosition);
|
||||
}
|
||||
nodeArrayContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||
info.mFrequency,
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED), children));
|
||||
} else {
|
||||
nodeArrayContents.add(
|
||||
new CharGroup(info.mCharacters, shortcutTargets, bigrams,
|
||||
info.mFrequency,
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_NOT_A_WORD),
|
||||
0 != (info.mFlags & FormatSpec.FLAG_IS_BLACKLISTED)));
|
||||
}
|
||||
groupOffset = info.mEndAddress;
|
||||
}
|
||||
|
||||
// reach the end of the array.
|
||||
if (options.mSupportsDynamicUpdate) {
|
||||
final int nextAddress = dictBuffer.readUnsignedInt24();
|
||||
if (nextAddress >= 0 && nextAddress < dictBuffer.limit()) {
|
||||
dictBuffer.position(nextAddress);
|
||||
} else {
|
||||
break;
|
||||
}
|
||||
}
|
||||
} while (options.mSupportsDynamicUpdate &&
|
||||
dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
|
||||
final PtNodeArray nodeArray = new PtNodeArray(nodeArrayContents);
|
||||
nodeArray.mCachedAddressBeforeUpdate = nodeArrayOrigin;
|
||||
nodeArray.mCachedAddressAfterUpdate = nodeArrayOrigin;
|
||||
reverseNodeArrayMap.put(nodeArray.mCachedAddressAfterUpdate, nodeArray);
|
||||
return nodeArray;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to get the binary format version from the header.
|
||||
* @throws IOException
|
||||
*/
|
||||
private static int getFormatVersion(final DictBuffer dictBuffer)
|
||||
throws IOException {
|
||||
final int magic = dictBuffer.readInt();
|
||||
if (FormatSpec.MAGIC_NUMBER == magic) return dictBuffer.readUnsignedShort();
|
||||
return FormatSpec.NOT_A_VERSION_NUMBER;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper function to get and validate the binary format version.
|
||||
* @throws UnsupportedFormatException
|
||||
* @throws IOException
|
||||
*/
|
||||
static int checkFormatVersion(final DictBuffer dictBuffer)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = getFormatVersion(dictBuffer);
|
||||
if (version < FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||
|| version > FormatSpec.MAXIMUM_SUPPORTED_VERSION) {
|
||||
throw new UnsupportedFormatException("This file has version " + version
|
||||
+ ", but this implementation does not support versions above "
|
||||
+ FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
||||
}
|
||||
return version;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a header from a buffer.
|
||||
* @param headerReader the header reader
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
public static FileHeader readHeader(final HeaderReader headerReader)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final int version = headerReader.readVersion();
|
||||
final int optionsFlags = headerReader.readOptionFlags();
|
||||
|
||||
final int headerSize = headerReader.readHeaderSize();
|
||||
|
||||
if (headerSize < 0) {
|
||||
throw new UnsupportedFormatException("header size can't be negative.");
|
||||
}
|
||||
|
||||
final HashMap<String, String> attributes = headerReader.readAttributes(headerSize);
|
||||
|
||||
final FileHeader header = new FileHeader(headerSize,
|
||||
new FusionDictionary.DictionaryOptions(attributes,
|
||||
0 != (optionsFlags & FormatSpec.GERMAN_UMLAUT_PROCESSING_FLAG),
|
||||
0 != (optionsFlags & FormatSpec.FRENCH_LIGATURE_PROCESSING_FLAG)),
|
||||
new FormatOptions(version,
|
||||
0 != (optionsFlags & FormatSpec.SUPPORTS_DYNAMIC_UPDATE)));
|
||||
return header;
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads options from a buffer and populate a map with their contents.
|
||||
*
|
||||
* The buffer is read at the current position, so the caller must take care the pointer
|
||||
* is in the right place before calling this.
|
||||
*/
|
||||
public static void populateOptions(final DictBuffer dictBuffer,
|
||||
final int headerSize, final HashMap<String, String> options) {
|
||||
while (dictBuffer.position() < headerSize) {
|
||||
final String key = CharEncoding.readString(dictBuffer);
|
||||
final String value = CharEncoding.readString(dictBuffer);
|
||||
options.put(key, value);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Reads a buffer and returns the memory representation of the dictionary.
|
||||
*
|
||||
* This high-level method takes a buffer and reads its contents, populating a
|
||||
* FusionDictionary structure. The optional dict argument is an existing dictionary to
|
||||
* which words from the buffer should be added. If it is null, a new dictionary is created.
|
||||
*
|
||||
* @param dictDecoder the dict decoder.
|
||||
* @param dict an optional dictionary to add words to, or null.
|
||||
* @return the created (or merged) dictionary.
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static FusionDictionary readDictionaryBinary(final BinaryDictDecoder dictDecoder,
|
||||
final FusionDictionary dict) throws FileNotFoundException, IOException,
|
||||
UnsupportedFormatException {
|
||||
|
||||
// if the buffer has not been opened, open the buffer with bytebuffer.
|
||||
if (dictDecoder.getDictBuffer() == null) dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
if (dictDecoder.getDictBuffer() == null) {
|
||||
MakedictLog.e("Cannot open the buffer");
|
||||
}
|
||||
|
||||
// Read header
|
||||
final FileHeader fileHeader = readHeader(dictDecoder);
|
||||
|
||||
Map<Integer, PtNodeArray> reverseNodeArrayMapping = new TreeMap<Integer, PtNodeArray>();
|
||||
Map<Integer, CharGroup> reverseGroupMapping = new TreeMap<Integer, CharGroup>();
|
||||
final PtNodeArray root = readNodeArray(dictDecoder.getDictBuffer(), fileHeader.mHeaderSize,
|
||||
reverseNodeArrayMapping, reverseGroupMapping, fileHeader.mFormatOptions);
|
||||
|
||||
FusionDictionary newDict = new FusionDictionary(root, fileHeader.mDictionaryOptions);
|
||||
if (null != dict) {
|
||||
for (final Word w : dict) {
|
||||
if (w.mIsBlacklistEntry) {
|
||||
newDict.addBlacklistEntry(w.mWord, w.mShortcutTargets, w.mIsNotAWord);
|
||||
} else {
|
||||
newDict.add(w.mWord, w.mFrequency, w.mShortcutTargets, w.mIsNotAWord);
|
||||
}
|
||||
}
|
||||
for (final Word w : dict) {
|
||||
// By construction a binary dictionary may not have bigrams pointing to
|
||||
// words that are not also registered as unigrams so we don't have to avoid
|
||||
// them explicitly here.
|
||||
for (final WeightedString bigram : w.mBigrams) {
|
||||
newDict.setBigram(w.mWord, bigram.mWord, bigram.mFrequency);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return newDict;
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to pass a file name instead of a File object to isBinaryDictionary.
|
||||
*/
|
||||
public static boolean isBinaryDictionary(final String filename) {
|
||||
final File file = new File(filename);
|
||||
return isBinaryDictionary(file);
|
||||
}
|
||||
|
||||
/**
|
||||
* Basic test to find out whether the file is a binary dictionary or not.
|
||||
*
|
||||
* Concretely this only tests the magic number.
|
||||
*
|
||||
* @param file The file to test.
|
||||
* @return true if it's a binary dictionary, false otherwise
|
||||
*/
|
||||
public static boolean isBinaryDictionary(final File file) {
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
final ByteBuffer buffer = inStream.getChannel().map(
|
||||
FileChannel.MapMode.READ_ONLY, 0, file.length());
|
||||
final int version = getFormatVersion(new ByteBufferDictBuffer(buffer));
|
||||
return (version >= FormatSpec.MINIMUM_SUPPORTED_VERSION
|
||||
&& version <= FormatSpec.MAXIMUM_SUPPORTED_VERSION);
|
||||
} catch (FileNotFoundException e) {
|
||||
return false;
|
||||
} catch (IOException e) {
|
||||
return false;
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
try {
|
||||
inStream.close();
|
||||
} catch (IOException e) {
|
||||
// do nothing
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
|
@ -16,7 +16,7 @@
|
|||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
|
|
|
@ -18,13 +18,13 @@ package com.android.inputmethod.latin.makedict;
|
|||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayDictBuffer;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
|
@ -62,7 +62,7 @@ public final class BinaryDictIOUtils {
|
|||
* Retrieves all node arrays without recursive call.
|
||||
*/
|
||||
private static void readUnigramsAndBigramsBinaryInner(
|
||||
final FusionDictionaryBufferInterface buffer, final int headerSize,
|
||||
final DictBuffer dictBuffer, final int headerSize,
|
||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams,
|
||||
final FormatOptions formatOptions) {
|
||||
|
@ -82,11 +82,11 @@ public final class BinaryDictIOUtils {
|
|||
p.mNumOfCharGroup + ", position=" + p.mPosition + ", length=" + p.mLength);
|
||||
}
|
||||
|
||||
if (buffer.position() != p.mAddress) buffer.position(p.mAddress);
|
||||
if (dictBuffer.position() != p.mAddress) dictBuffer.position(p.mAddress);
|
||||
if (index != p.mLength) index = p.mLength;
|
||||
|
||||
if (p.mNumOfCharGroup == Position.NOT_READ_GROUPCOUNT) {
|
||||
p.mNumOfCharGroup = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
p.mNumOfCharGroup = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||
p.mAddress += getGroupCountSize(p.mNumOfCharGroup);
|
||||
p.mPosition = 0;
|
||||
}
|
||||
|
@ -94,7 +94,7 @@ public final class BinaryDictIOUtils {
|
|||
stack.pop();
|
||||
continue;
|
||||
}
|
||||
CharGroupInfo info = BinaryDictDecoder.readCharGroup(buffer,
|
||||
CharGroupInfo info = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
|
||||
p.mAddress - headerSize, formatOptions);
|
||||
for (int i = 0; i < info.mCharacters.length; ++i) {
|
||||
pushedChars[index++] = info.mCharacters[i];
|
||||
|
@ -114,7 +114,7 @@ public final class BinaryDictIOUtils {
|
|||
|
||||
if (p.mPosition == p.mNumOfCharGroup) {
|
||||
if (formatOptions.mSupportsDynamicUpdate) {
|
||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||
if (forwardLinkAddress != FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||
// The node array has a forward link.
|
||||
p.mNumOfCharGroup = Position.NOT_READ_GROUPCOUNT;
|
||||
|
@ -127,7 +127,7 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
} else {
|
||||
// The node array has more groups.
|
||||
p.mAddress = buffer.position();
|
||||
p.mAddress = dictBuffer.position();
|
||||
}
|
||||
|
||||
if (!isMovedGroup && hasChildrenAddress(info.mChildrenAddress)) {
|
||||
|
@ -141,20 +141,20 @@ public final class BinaryDictIOUtils {
|
|||
* Reads unigrams and bigrams from the binary file.
|
||||
* Doesn't store a full memory representation of the dictionary.
|
||||
*
|
||||
* @param dictReader the dict reader.
|
||||
* @param dictDecoder the dict decoder.
|
||||
* @param words the map to store the address as a key and the word as a value.
|
||||
* @param frequencies the map to store the address as a key and the frequency as a value.
|
||||
* @param bigrams the map to store the address as a key and the list of address as a value.
|
||||
* @throws IOException if the file can't be read.
|
||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||
*/
|
||||
public static void readUnigramsAndBigramsBinary(final BinaryDictReader dictReader,
|
||||
public static void readUnigramsAndBigramsBinary(final BinaryDictDecoder dictDecoder,
|
||||
final Map<Integer, String> words, final Map<Integer, Integer> frequencies,
|
||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams) throws IOException,
|
||||
UnsupportedFormatException {
|
||||
// Read header
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
readUnigramsAndBigramsBinaryInner(dictReader.getBuffer(), header.mHeaderSize, words,
|
||||
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
readUnigramsAndBigramsBinaryInner(dictDecoder.getDictBuffer(), header.mHeaderSize, words,
|
||||
frequencies, bigrams, header.mFormatOptions);
|
||||
}
|
||||
|
||||
|
@ -162,32 +162,32 @@ public final class BinaryDictIOUtils {
|
|||
* Gets the address of the last CharGroup of the exact matching word in the dictionary.
|
||||
* If no match is found, returns NOT_VALID_WORD.
|
||||
*
|
||||
* @param dictReader the dict reader.
|
||||
* @param dictDecoder the dict decoder.
|
||||
* @param word the word we search for.
|
||||
* @return the address of the terminal node.
|
||||
* @throws IOException if the file can't be read.
|
||||
* @throws UnsupportedFormatException if the format of the file is not recognized.
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static int getTerminalPosition(final BinaryDictReader dictReader,
|
||||
public static int getTerminalPosition(final BinaryDictDecoder dictDecoder,
|
||||
final String word) throws IOException, UnsupportedFormatException {
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
if (word == null) return FormatSpec.NOT_VALID_WORD;
|
||||
if (buffer.position() != 0) buffer.position(0);
|
||||
if (dictBuffer.position() != 0) dictBuffer.position(0);
|
||||
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
int wordPos = 0;
|
||||
final int wordLen = word.codePointCount(0, word.length());
|
||||
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
||||
if (wordPos >= wordLen) return FormatSpec.NOT_VALID_WORD;
|
||||
|
||||
do {
|
||||
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||
boolean foundNextCharGroup = false;
|
||||
for (int i = 0; i < charGroupCount; ++i) {
|
||||
final int charGroupPos = buffer.position();
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
||||
buffer.position(), header.mFormatOptions);
|
||||
final int charGroupPos = dictBuffer.position();
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(
|
||||
dictBuffer, dictBuffer.position(), header.mFormatOptions);
|
||||
final boolean isMovedGroup = isMovedGroup(currentInfo.mFlags,
|
||||
header.mFormatOptions);
|
||||
final boolean isDeletedGroup = isDeletedGroup(currentInfo.mFlags,
|
||||
|
@ -219,7 +219,7 @@ public final class BinaryDictIOUtils {
|
|||
return FormatSpec.NOT_VALID_WORD;
|
||||
}
|
||||
foundNextCharGroup = true;
|
||||
buffer.position(currentInfo.mChildrenAddress);
|
||||
dictBuffer.position(currentInfo.mChildrenAddress);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
@ -233,11 +233,11 @@ public final class BinaryDictIOUtils {
|
|||
return FormatSpec.NOT_VALID_WORD;
|
||||
}
|
||||
|
||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||
return FormatSpec.NOT_VALID_WORD;
|
||||
}
|
||||
buffer.position(forwardLinkAddress);
|
||||
dictBuffer.position(forwardLinkAddress);
|
||||
} while(true);
|
||||
}
|
||||
return FormatSpec.NOT_VALID_WORD;
|
||||
|
@ -246,12 +246,12 @@ public final class BinaryDictIOUtils {
|
|||
/**
|
||||
* @return the size written, in bytes. Always 3 bytes.
|
||||
*/
|
||||
static int writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer,
|
||||
static int writeSInt24ToBuffer(final DictBuffer dictBuffer,
|
||||
final int value) {
|
||||
final int absValue = Math.abs(value);
|
||||
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
||||
buffer.put((byte)((absValue >> 8) & 0xFF));
|
||||
buffer.put((byte)(absValue & 0xFF));
|
||||
dictBuffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
||||
dictBuffer.put((byte)((absValue >> 8) & 0xFF));
|
||||
dictBuffer.put((byte)(absValue & 0xFF));
|
||||
return 3;
|
||||
}
|
||||
|
||||
|
@ -289,31 +289,31 @@ public final class BinaryDictIOUtils {
|
|||
return BinaryDictEncoder.getByteSize(value);
|
||||
}
|
||||
|
||||
static void skipCharGroup(final FusionDictionaryBufferInterface buffer,
|
||||
static void skipCharGroup(final DictBuffer dictBuffer,
|
||||
final FormatOptions formatOptions) {
|
||||
final int flags = buffer.readUnsignedByte();
|
||||
BinaryDictDecoder.readParentAddress(buffer, formatOptions);
|
||||
skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||
BinaryDictDecoder.readChildrenAddress(buffer, flags, formatOptions);
|
||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
|
||||
final int flags = dictBuffer.readUnsignedByte();
|
||||
BinaryDictDecoderUtils.readParentAddress(dictBuffer, formatOptions);
|
||||
skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||
BinaryDictDecoderUtils.readChildrenAddress(dictBuffer, flags, formatOptions);
|
||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
|
||||
if ((flags & FormatSpec.FLAG_HAS_SHORTCUT_TARGETS) != 0) {
|
||||
final int shortcutsSize = buffer.readUnsignedShort();
|
||||
buffer.position(buffer.position() + shortcutsSize
|
||||
final int shortcutsSize = dictBuffer.readUnsignedShort();
|
||||
dictBuffer.position(dictBuffer.position() + shortcutsSize
|
||||
- FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE);
|
||||
}
|
||||
if ((flags & FormatSpec.FLAG_HAS_BIGRAMS) != 0) {
|
||||
int bigramCount = 0;
|
||||
while (bigramCount++ < FormatSpec.MAX_BIGRAMS_IN_A_GROUP) {
|
||||
final int bigramFlags = buffer.readUnsignedByte();
|
||||
final int bigramFlags = dictBuffer.readUnsignedByte();
|
||||
switch (bigramFlags & FormatSpec.MASK_ATTRIBUTE_ADDRESS_TYPE) {
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
|
||||
buffer.readUnsignedByte();
|
||||
dictBuffer.readUnsignedByte();
|
||||
break;
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
|
||||
buffer.readUnsignedShort();
|
||||
dictBuffer.readUnsignedShort();
|
||||
break;
|
||||
case FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
|
||||
buffer.readUnsignedInt24();
|
||||
dictBuffer.readUnsignedInt24();
|
||||
break;
|
||||
}
|
||||
if ((bigramFlags & FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT) == 0) break;
|
||||
|
@ -324,15 +324,15 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
}
|
||||
|
||||
static void skipString(final FusionDictionaryBufferInterface buffer,
|
||||
static void skipString(final DictBuffer dictBuffer,
|
||||
final boolean hasMultipleChars) {
|
||||
if (hasMultipleChars) {
|
||||
int character = CharEncoding.readChar(buffer);
|
||||
int character = CharEncoding.readChar(dictBuffer);
|
||||
while (character != FormatSpec.INVALID_CHARACTER) {
|
||||
character = CharEncoding.readChar(buffer);
|
||||
character = CharEncoding.readChar(dictBuffer);
|
||||
}
|
||||
} else {
|
||||
CharEncoding.readChar(buffer);
|
||||
CharEncoding.readChar(dictBuffer);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -508,24 +508,25 @@ public final class BinaryDictIOUtils {
|
|||
}
|
||||
|
||||
/**
|
||||
* Find a word using the BinaryDictReader.
|
||||
* Find a word using the BinaryDictDecoder.
|
||||
*
|
||||
* @param dictReader the dict reader
|
||||
* @param dictDecoder the dict reader
|
||||
* @param word the word searched
|
||||
* @return the found group
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictReader dictReader,
|
||||
public static CharGroupInfo findWordByBinaryDictReader(final BinaryDictDecoder dictDecoder,
|
||||
final String word) throws IOException, UnsupportedFormatException {
|
||||
int position = getTerminalPosition(dictReader, word);
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
int position = getTerminalPosition(dictDecoder, word);
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
if (position != FormatSpec.NOT_VALID_WORD) {
|
||||
buffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
buffer.position(position);
|
||||
return BinaryDictDecoder.readCharGroup(buffer, position, header.mFormatOptions);
|
||||
dictBuffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
dictBuffer.position(position);
|
||||
return BinaryDictDecoderUtils.readCharGroup(dictBuffer, position,
|
||||
header.mFormatOptions);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
@ -544,21 +545,21 @@ public final class BinaryDictIOUtils {
|
|||
final File file, final long offset, final long length)
|
||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||
final byte[] buffer = new byte[HEADER_READING_BUFFER_SIZE];
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
dictReader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFactory() {
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
dictDecoder.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFactory() {
|
||||
@Override
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(File file)
|
||||
public DictBuffer getDictionaryBuffer(File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
final FileInputStream inStream = new FileInputStream(file);
|
||||
try {
|
||||
inStream.read(buffer);
|
||||
return new ByteArrayWrapper(buffer);
|
||||
return new ByteArrayDictBuffer(buffer);
|
||||
} finally {
|
||||
inStream.close();
|
||||
}
|
||||
}
|
||||
});
|
||||
return BinaryDictDecoder.readHeader(dictReader);
|
||||
return BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
}
|
||||
|
||||
public static FileHeader getDictionaryFileHeaderOrNull(final File file, final long offset,
|
||||
|
@ -636,4 +637,19 @@ public final class BinaryDictIOUtils {
|
|||
return 0;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate bigram frequency from compressed value
|
||||
*
|
||||
* @param unigramFrequency
|
||||
* @param bigramFrequency compressed frequency
|
||||
* @return approximate bigram frequency
|
||||
*/
|
||||
public static int reconstructBigramFrequency(final int unigramFrequency,
|
||||
final int bigramFrequency) {
|
||||
final float stepSize = (FormatSpec.MAX_TERMINAL_FREQUENCY - unigramFrequency)
|
||||
/ (1.5f + FormatSpec.MAX_BIGRAM_FREQUENCY);
|
||||
final float resultFreqFloat = unigramFrequency + stepSize * (bigramFrequency + 1.0f);
|
||||
return (int)resultFreqFloat;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,169 +0,0 @@
|
|||
/*
|
||||
* Copyright (C) 2013 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.CharEncoding;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.decoder.HeaderReaderInterface;
|
||||
import com.android.inputmethod.latin.utils.ByteArrayWrapper;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.io.RandomAccessFile;
|
||||
import java.nio.ByteBuffer;
|
||||
import java.nio.channels.FileChannel;
|
||||
import java.util.HashMap;
|
||||
|
||||
public class BinaryDictReader implements HeaderReaderInterface {
|
||||
|
||||
public interface FusionDictionaryBufferFactory {
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException;
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates FusionDictionaryBuffer from a ByteBuffer
|
||||
*/
|
||||
public static final class FusionDictionaryBufferFromByteBufferFactory
|
||||
implements FusionDictionaryBufferFactory {
|
||||
@Override
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
FileInputStream inStream = null;
|
||||
ByteBuffer buffer = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
buffer = inStream.getChannel().map(FileChannel.MapMode.READ_ONLY,
|
||||
0, file.length());
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
inStream.close();
|
||||
}
|
||||
}
|
||||
if (buffer != null) {
|
||||
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates FusionDictionaryBuffer from a byte array
|
||||
*/
|
||||
public static final class FusionDictionaryBufferFromByteArrayFactory
|
||||
implements FusionDictionaryBufferFactory {
|
||||
@Override
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
final byte[] array = new byte[(int) file.length()];
|
||||
inStream.read(array);
|
||||
return new ByteArrayWrapper(array);
|
||||
} finally {
|
||||
if (inStream != null) {
|
||||
inStream.close();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Creates FusionDictionaryBuffer from a RandomAccessFile.
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static final class FusionDictionaryBufferFromWritableByteBufferFactory
|
||||
implements FusionDictionaryBufferFactory {
|
||||
@Override
|
||||
public FusionDictionaryBufferInterface getFusionDictionaryBuffer(final File file)
|
||||
throws FileNotFoundException, IOException {
|
||||
RandomAccessFile raFile = null;
|
||||
ByteBuffer buffer = null;
|
||||
try {
|
||||
raFile = new RandomAccessFile(file, "rw");
|
||||
buffer = raFile.getChannel().map(FileChannel.MapMode.READ_WRITE, 0, file.length());
|
||||
} finally {
|
||||
if (raFile != null) {
|
||||
raFile.close();
|
||||
}
|
||||
}
|
||||
if (buffer != null) {
|
||||
return new BinaryDictDecoder.ByteBufferWrapper(buffer);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
private final File mDictionaryBinaryFile;
|
||||
private FusionDictionaryBufferInterface mFusionDictionaryBuffer;
|
||||
|
||||
public BinaryDictReader(final File file) {
|
||||
mDictionaryBinaryFile = file;
|
||||
mFusionDictionaryBuffer = null;
|
||||
}
|
||||
|
||||
public void openBuffer(final FusionDictionaryBufferFactory factory)
|
||||
throws FileNotFoundException, IOException {
|
||||
mFusionDictionaryBuffer = factory.getFusionDictionaryBuffer(mDictionaryBinaryFile);
|
||||
}
|
||||
|
||||
public FusionDictionaryBufferInterface getBuffer() {
|
||||
return mFusionDictionaryBuffer;
|
||||
}
|
||||
|
||||
@UsedForTesting
|
||||
public FusionDictionaryBufferInterface openAndGetBuffer(
|
||||
final FusionDictionaryBufferFactory factory)
|
||||
throws FileNotFoundException, IOException {
|
||||
openBuffer(factory);
|
||||
return getBuffer();
|
||||
}
|
||||
|
||||
// The implementation of HeaderReaderInterface
|
||||
@Override
|
||||
public int readVersion() throws IOException, UnsupportedFormatException {
|
||||
return BinaryDictDecoder.checkFormatVersion(mFusionDictionaryBuffer);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readOptionFlags() {
|
||||
return mFusionDictionaryBuffer.readUnsignedShort();
|
||||
}
|
||||
|
||||
@Override
|
||||
public int readHeaderSize() {
|
||||
return mFusionDictionaryBuffer.readInt();
|
||||
}
|
||||
|
||||
@Override
|
||||
public HashMap<String, String> readAttributes(final int headerSize) {
|
||||
final HashMap<String, String> attributes = new HashMap<String, String>();
|
||||
while (mFusionDictionaryBuffer.position() < headerSize) {
|
||||
// We can avoid infinite loop here since mFusionDictonary.position() is always increased
|
||||
// by calling CharEncoding.readString.
|
||||
final String key = CharEncoding.readString(mFusionDictionaryBuffer);
|
||||
final String value = CharEncoding.readString(mFusionDictionaryBuffer);
|
||||
attributes.put(key, value);
|
||||
}
|
||||
mFusionDictionaryBuffer.position(headerSize);
|
||||
return attributes;
|
||||
}
|
||||
}
|
|
@ -18,7 +18,7 @@ package com.android.inputmethod.latin.makedict;
|
|||
|
||||
import com.android.inputmethod.annotations.UsedForTesting;
|
||||
import com.android.inputmethod.latin.Constants;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
|
@ -49,142 +49,146 @@ public final class DynamicBinaryDictIOUtils {
|
|||
/**
|
||||
* Delete the word from the binary file.
|
||||
*
|
||||
* @param dictReader the dict reader.
|
||||
* @param dictDecoder the dict decoder.
|
||||
* @param word the word we delete
|
||||
* @throws IOException
|
||||
* @throws UnsupportedFormatException
|
||||
*/
|
||||
@UsedForTesting
|
||||
public static void deleteWord(final BinaryDictReader dictReader, final String word)
|
||||
public static void deleteWord(final BinaryDictDecoder dictDecoder, final String word)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
buffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoder.readHeader(dictReader);
|
||||
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
dictBuffer.position(0);
|
||||
final FileHeader header = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
final int wordPosition = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
|
||||
if (wordPosition == FormatSpec.NOT_VALID_WORD) return;
|
||||
|
||||
buffer.position(wordPosition);
|
||||
final int flags = buffer.readUnsignedByte();
|
||||
buffer.position(wordPosition);
|
||||
buffer.put((byte)markAsDeleted(flags));
|
||||
dictBuffer.position(wordPosition);
|
||||
final int flags = dictBuffer.readUnsignedByte();
|
||||
dictBuffer.position(wordPosition);
|
||||
dictBuffer.put((byte)markAsDeleted(flags));
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a parent address in a CharGroup that is referred to by groupOriginAddress.
|
||||
*
|
||||
* @param buffer the buffer to write.
|
||||
* @param dictBuffer the DictBuffer to write.
|
||||
* @param groupOriginAddress the address of the group.
|
||||
* @param newParentAddress the absolute address of the parent.
|
||||
* @param formatOptions file format options.
|
||||
*/
|
||||
public static void updateParentAddress(final FusionDictionaryBufferInterface buffer,
|
||||
public static void updateParentAddress(final DictBuffer dictBuffer,
|
||||
final int groupOriginAddress, final int newParentAddress,
|
||||
final FormatOptions formatOptions) {
|
||||
final int originalPosition = buffer.position();
|
||||
buffer.position(groupOriginAddress);
|
||||
final int originalPosition = dictBuffer.position();
|
||||
dictBuffer.position(groupOriginAddress);
|
||||
if (!formatOptions.mSupportsDynamicUpdate) {
|
||||
throw new RuntimeException("this file format does not support parent addresses");
|
||||
}
|
||||
final int flags = buffer.readUnsignedByte();
|
||||
final int flags = dictBuffer.readUnsignedByte();
|
||||
if (BinaryDictIOUtils.isMovedGroup(flags, formatOptions)) {
|
||||
// If the group is moved, the parent address is stored in the destination group.
|
||||
// We are guaranteed to process the destination group later, so there is no need to
|
||||
// update anything here.
|
||||
buffer.position(originalPosition);
|
||||
dictBuffer.position(originalPosition);
|
||||
return;
|
||||
}
|
||||
if (DBG) {
|
||||
MakedictLog.d("update parent address flags=" + flags + ", " + groupOriginAddress);
|
||||
}
|
||||
final int parentOffset = newParentAddress - groupOriginAddress;
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, parentOffset);
|
||||
buffer.position(originalPosition);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, parentOffset);
|
||||
dictBuffer.position(originalPosition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update parent addresses in a node array stored at nodeOriginAddress.
|
||||
*
|
||||
* @param buffer the buffer to be modified.
|
||||
* @param dictBuffer the DictBuffer to be modified.
|
||||
* @param nodeOriginAddress the address of the node array to update.
|
||||
* @param newParentAddress the address to be written.
|
||||
* @param formatOptions file format options.
|
||||
*/
|
||||
public static void updateParentAddresses(final FusionDictionaryBufferInterface buffer,
|
||||
public static void updateParentAddresses(final DictBuffer dictBuffer,
|
||||
final int nodeOriginAddress, final int newParentAddress,
|
||||
final FormatOptions formatOptions) {
|
||||
final int originalPosition = buffer.position();
|
||||
buffer.position(nodeOriginAddress);
|
||||
final int originalPosition = dictBuffer.position();
|
||||
dictBuffer.position(nodeOriginAddress);
|
||||
do {
|
||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
updateParentAddress(buffer, buffer.position(), newParentAddress, formatOptions);
|
||||
BinaryDictIOUtils.skipCharGroup(buffer, formatOptions);
|
||||
updateParentAddress(dictBuffer, dictBuffer.position(), newParentAddress,
|
||||
formatOptions);
|
||||
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
|
||||
}
|
||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
buffer.position(forwardLinkAddress);
|
||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||
dictBuffer.position(forwardLinkAddress);
|
||||
} while (formatOptions.mSupportsDynamicUpdate
|
||||
&& buffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
buffer.position(originalPosition);
|
||||
&& dictBuffer.position() != FormatSpec.NO_FORWARD_LINK_ADDRESS);
|
||||
dictBuffer.position(originalPosition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
|
||||
*
|
||||
* @param buffer the buffer to write.
|
||||
* @param dictBuffer the DictBuffer to write.
|
||||
* @param groupOriginAddress the address of the group.
|
||||
* @param newChildrenAddress the absolute address of the child.
|
||||
* @param formatOptions file format options.
|
||||
*/
|
||||
public static void updateChildrenAddress(final FusionDictionaryBufferInterface buffer,
|
||||
public static void updateChildrenAddress(final DictBuffer dictBuffer,
|
||||
final int groupOriginAddress, final int newChildrenAddress,
|
||||
final FormatOptions formatOptions) {
|
||||
final int originalPosition = buffer.position();
|
||||
buffer.position(groupOriginAddress);
|
||||
final int flags = buffer.readUnsignedByte();
|
||||
final int parentAddress = BinaryDictDecoder.readParentAddress(buffer, formatOptions);
|
||||
BinaryDictIOUtils.skipString(buffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
|
||||
final int originalPosition = dictBuffer.position();
|
||||
dictBuffer.position(groupOriginAddress);
|
||||
final int flags = dictBuffer.readUnsignedByte();
|
||||
final int parentAddress = BinaryDictDecoderUtils.readParentAddress(dictBuffer,
|
||||
formatOptions);
|
||||
BinaryDictIOUtils.skipString(dictBuffer, (flags & FormatSpec.FLAG_HAS_MULTIPLE_CHARS) != 0);
|
||||
if ((flags & FormatSpec.FLAG_IS_TERMINAL) != 0) dictBuffer.readUnsignedByte();
|
||||
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
|
||||
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, childrenOffset);
|
||||
buffer.position(originalPosition);
|
||||
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - dictBuffer.position();
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, childrenOffset);
|
||||
dictBuffer.position(originalPosition);
|
||||
}
|
||||
|
||||
/**
|
||||
* Helper method to move a char group to the tail of the file.
|
||||
*/
|
||||
private static int moveCharGroup(final OutputStream destination,
|
||||
final FusionDictionaryBufferInterface buffer, final CharGroupInfo info,
|
||||
final DictBuffer dictBuffer, final CharGroupInfo info,
|
||||
final int nodeArrayOriginAddress, final int oldGroupAddress,
|
||||
final FormatOptions formatOptions) throws IOException {
|
||||
updateParentAddress(buffer, oldGroupAddress, buffer.limit() + 1, formatOptions);
|
||||
buffer.position(oldGroupAddress);
|
||||
final int currentFlags = buffer.readUnsignedByte();
|
||||
buffer.position(oldGroupAddress);
|
||||
buffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
||||
updateParentAddress(dictBuffer, oldGroupAddress, dictBuffer.limit() + 1, formatOptions);
|
||||
dictBuffer.position(oldGroupAddress);
|
||||
final int currentFlags = dictBuffer.readUnsignedByte();
|
||||
dictBuffer.position(oldGroupAddress);
|
||||
dictBuffer.put((byte)(FormatSpec.FLAG_IS_MOVED | (currentFlags
|
||||
& (~FormatSpec.MASK_MOVE_AND_DELETE_FLAG))));
|
||||
int size = FormatSpec.GROUP_FLAGS_SIZE;
|
||||
updateForwardLink(buffer, nodeArrayOriginAddress, buffer.limit(), formatOptions);
|
||||
updateForwardLink(dictBuffer, nodeArrayOriginAddress, dictBuffer.limit(), formatOptions);
|
||||
size += BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { info });
|
||||
return size;
|
||||
}
|
||||
|
||||
@SuppressWarnings("unused")
|
||||
private static void updateForwardLink(final FusionDictionaryBufferInterface buffer,
|
||||
private static void updateForwardLink(final DictBuffer dictBuffer,
|
||||
final int nodeArrayOriginAddress, final int newNodeArrayAddress,
|
||||
final FormatOptions formatOptions) {
|
||||
buffer.position(nodeArrayOriginAddress);
|
||||
dictBuffer.position(nodeArrayOriginAddress);
|
||||
int jumpCount = 0;
|
||||
while (jumpCount++ < MAX_JUMPS) {
|
||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
for (int i = 0; i < count; ++i) BinaryDictIOUtils.skipCharGroup(buffer, formatOptions);
|
||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
BinaryDictIOUtils.skipCharGroup(dictBuffer, formatOptions);
|
||||
}
|
||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||
if (forwardLinkAddress == FormatSpec.NO_FORWARD_LINK_ADDRESS) {
|
||||
buffer.position(buffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeArrayAddress);
|
||||
dictBuffer.position(dictBuffer.position() - FormatSpec.FORWARD_LINK_ADDRESS_SIZE);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeArrayAddress);
|
||||
return;
|
||||
}
|
||||
buffer.position(forwardLinkAddress);
|
||||
dictBuffer.position(forwardLinkAddress);
|
||||
}
|
||||
if (DBG && jumpCount >= MAX_JUMPS) {
|
||||
throw new RuntimeException("too many jumps, probably a bug.");
|
||||
|
@ -204,7 +208,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
* @param shortcutTargets the shortcut targets for this group.
|
||||
* @param bigrams the bigrams for this group.
|
||||
* @param destination the stream representing the tail of the file.
|
||||
* @param buffer the buffer representing the (constant-size) body of the file.
|
||||
* @param dictBuffer the DictBuffer representing the (constant-size) body of the file.
|
||||
* @param oldNodeArrayOrigin the origin of the old node array this group was a part of.
|
||||
* @param oldGroupOrigin the old origin where this group used to be stored.
|
||||
* @param formatOptions format options for this dictionary.
|
||||
|
@ -215,7 +219,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final int length, final int flags, final int frequency, final int parentAddress,
|
||||
final ArrayList<WeightedString> shortcutTargets,
|
||||
final ArrayList<PendingAttribute> bigrams, final OutputStream destination,
|
||||
final FusionDictionaryBufferInterface buffer, final int oldNodeArrayOrigin,
|
||||
final DictBuffer dictBuffer, final int oldNodeArrayOrigin,
|
||||
final int oldGroupOrigin, final FormatOptions formatOptions) throws IOException {
|
||||
int size = 0;
|
||||
final int newGroupOrigin = fileEndAddress + 1;
|
||||
|
@ -228,7 +232,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
flags, writtenCharacters, frequency, parentAddress,
|
||||
fileEndAddress + 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE, shortcutTargets,
|
||||
bigrams);
|
||||
moveCharGroup(destination, buffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
|
||||
moveCharGroup(destination, dictBuffer, newInfo, oldNodeArrayOrigin, oldGroupOrigin,
|
||||
formatOptions);
|
||||
return 1 + size + FormatSpec.FORWARD_LINK_ADDRESS_SIZE;
|
||||
}
|
||||
|
@ -236,7 +240,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
/**
|
||||
* Insert a word into a binary dictionary.
|
||||
*
|
||||
* @param dictReader the dict reader.
|
||||
* @param dictDecoder the dict decoder.
|
||||
* @param destination a stream to the underlying file, with the pointer at the end of the file.
|
||||
* @param word the word to insert.
|
||||
* @param frequency the frequency of the new word.
|
||||
|
@ -249,16 +253,17 @@ public final class DynamicBinaryDictIOUtils {
|
|||
// TODO: Support batch insertion.
|
||||
// TODO: Remove @UsedForTesting once UserHistoryDictionary is implemented by BinaryDictionary.
|
||||
@UsedForTesting
|
||||
public static void insertWord(final BinaryDictReader dictReader, final OutputStream destination,
|
||||
final String word, final int frequency, final ArrayList<WeightedString> bigramStrings,
|
||||
public static void insertWord(final BinaryDictDecoder dictDecoder,
|
||||
final OutputStream destination, final String word, final int frequency,
|
||||
final ArrayList<WeightedString> bigramStrings,
|
||||
final ArrayList<WeightedString> shortcuts, final boolean isNotAWord,
|
||||
final boolean isBlackListEntry)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final ArrayList<PendingAttribute> bigrams = new ArrayList<PendingAttribute>();
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
if (bigramStrings != null) {
|
||||
for (final WeightedString bigram : bigramStrings) {
|
||||
int position = BinaryDictIOUtils.getTerminalPosition(dictReader, bigram.mWord);
|
||||
int position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, bigram.mWord);
|
||||
if (position == FormatSpec.NOT_VALID_WORD) {
|
||||
// TODO: figure out what is the correct thing to do here.
|
||||
} else {
|
||||
|
@ -272,24 +277,24 @@ public final class DynamicBinaryDictIOUtils {
|
|||
final boolean hasShortcuts = shortcuts != null && !shortcuts.isEmpty();
|
||||
|
||||
// find the insert position of the word.
|
||||
if (buffer.position() != 0) buffer.position(0);
|
||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
if (dictBuffer.position() != 0) dictBuffer.position(0);
|
||||
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
|
||||
int wordPos = 0, address = buffer.position(), nodeOriginAddress = buffer.position();
|
||||
int wordPos = 0, address = dictBuffer.position(), nodeOriginAddress = dictBuffer.position();
|
||||
final int[] codePoints = FusionDictionary.getCodePoints(word);
|
||||
final int wordLen = codePoints.length;
|
||||
|
||||
for (int depth = 0; depth < Constants.DICTIONARY_MAX_WORD_LENGTH; ++depth) {
|
||||
if (wordPos >= wordLen) break;
|
||||
nodeOriginAddress = buffer.position();
|
||||
nodeOriginAddress = dictBuffer.position();
|
||||
int nodeParentAddress = -1;
|
||||
final int charGroupCount = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
final int charGroupCount = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||
boolean foundNextGroup = false;
|
||||
|
||||
for (int i = 0; i < charGroupCount; ++i) {
|
||||
address = buffer.position();
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
||||
buffer.position(), fileHeader.mFormatOptions);
|
||||
address = dictBuffer.position();
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
|
||||
dictBuffer.position(), fileHeader.mFormatOptions);
|
||||
final boolean isMovedGroup = BinaryDictIOUtils.isMovedGroup(currentInfo.mFlags,
|
||||
fileHeader.mFormatOptions);
|
||||
if (isMovedGroup) continue;
|
||||
|
@ -308,18 +313,18 @@ public final class DynamicBinaryDictIOUtils {
|
|||
* after
|
||||
* abc - d - ef
|
||||
*/
|
||||
final int newNodeAddress = buffer.limit();
|
||||
final int newNodeAddress = dictBuffer.limit();
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(p > 1,
|
||||
isTerminal, 0, hasShortcuts, hasBigrams, false /* isNotAWord */,
|
||||
false /* isBlackListEntry */, fileHeader.mFormatOptions);
|
||||
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p, flags,
|
||||
frequency, nodeParentAddress, shortcuts, bigrams, destination,
|
||||
buffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
|
||||
dictBuffer, nodeOriginAddress, address, fileHeader.mFormatOptions);
|
||||
|
||||
final int[] characters2 = Arrays.copyOfRange(currentInfo.mCharacters, p,
|
||||
currentInfo.mCharacters.length);
|
||||
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
|
||||
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
|
||||
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
||||
}
|
||||
final CharGroupInfo newInfo2 = new CharGroupInfo(
|
||||
|
@ -344,7 +349,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
* - c
|
||||
*/
|
||||
|
||||
final int newNodeAddress = buffer.limit();
|
||||
final int newNodeAddress = dictBuffer.limit();
|
||||
final int childrenAddress = currentInfo.mChildrenAddress;
|
||||
|
||||
// move prefix
|
||||
|
@ -355,13 +360,13 @@ public final class DynamicBinaryDictIOUtils {
|
|||
fileHeader.mFormatOptions);
|
||||
int written = moveGroup(newNodeAddress, currentInfo.mCharacters, p,
|
||||
prefixFlags, -1 /* frequency */, nodeParentAddress, null, null,
|
||||
destination, buffer, nodeOriginAddress, address,
|
||||
destination, dictBuffer, nodeOriginAddress, address,
|
||||
fileHeader.mFormatOptions);
|
||||
|
||||
final int[] suffixCharacters = Arrays.copyOfRange(
|
||||
currentInfo.mCharacters, p, currentInfo.mCharacters.length);
|
||||
if (currentInfo.mChildrenAddress != FormatSpec.NO_CHILDREN_ADDRESS) {
|
||||
updateParentAddresses(buffer, currentInfo.mChildrenAddress,
|
||||
updateParentAddresses(dictBuffer, currentInfo.mChildrenAddress,
|
||||
newNodeAddress + written + 1, fileHeader.mFormatOptions);
|
||||
}
|
||||
final int suffixFlags = BinaryDictEncoder.makeCharGroupFlags(
|
||||
|
@ -403,7 +408,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
if (wordPos + currentInfo.mCharacters.length == wordLen) {
|
||||
// the word exists in the dictionary.
|
||||
// only update group.
|
||||
final int newNodeAddress = buffer.limit();
|
||||
final int newNodeAddress = dictBuffer.limit();
|
||||
final boolean hasMultipleChars = currentInfo.mCharacters.length > 1;
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(hasMultipleChars,
|
||||
isTerminal, 0 /* childrenAddressSize */, hasShortcuts, hasBigrams,
|
||||
|
@ -412,7 +417,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
-1 /* endAddress */, flags, currentInfo.mCharacters, frequency,
|
||||
nodeParentAddress, currentInfo.mChildrenAddress, shortcuts,
|
||||
bigrams);
|
||||
moveCharGroup(destination, buffer, newInfo, nodeOriginAddress, address,
|
||||
moveCharGroup(destination, dictBuffer, newInfo, nodeOriginAddress, address,
|
||||
fileHeader.mFormatOptions);
|
||||
return;
|
||||
}
|
||||
|
@ -430,8 +435,8 @@ public final class DynamicBinaryDictIOUtils {
|
|||
* after
|
||||
* ab - cd - e
|
||||
*/
|
||||
final int newNodeAddress = buffer.limit();
|
||||
updateChildrenAddress(buffer, address, newNodeAddress,
|
||||
final int newNodeAddress = dictBuffer.limit();
|
||||
updateChildrenAddress(dictBuffer, address, newNodeAddress,
|
||||
fileHeader.mFormatOptions);
|
||||
final int newGroupAddress = newNodeAddress + 1;
|
||||
final boolean hasMultipleChars = (wordLen - wordPos) > 1;
|
||||
|
@ -445,7 +450,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
BinaryDictIOUtils.writeNodes(destination, new CharGroupInfo[] { newInfo });
|
||||
return;
|
||||
}
|
||||
buffer.position(currentInfo.mChildrenAddress);
|
||||
dictBuffer.position(currentInfo.mChildrenAddress);
|
||||
foundNextGroup = true;
|
||||
break;
|
||||
}
|
||||
|
@ -454,8 +459,8 @@ public final class DynamicBinaryDictIOUtils {
|
|||
if (foundNextGroup) continue;
|
||||
|
||||
// reached the end of the array.
|
||||
final int linkAddressPosition = buffer.position();
|
||||
int nextLink = buffer.readUnsignedInt24();
|
||||
final int linkAddressPosition = dictBuffer.position();
|
||||
int nextLink = dictBuffer.readUnsignedInt24();
|
||||
if ((nextLink & FormatSpec.MSB24) != 0) {
|
||||
nextLink = -(nextLink & FormatSpec.SINT24_MAX);
|
||||
}
|
||||
|
@ -475,9 +480,9 @@ public final class DynamicBinaryDictIOUtils {
|
|||
*/
|
||||
|
||||
// change the forward link address.
|
||||
final int newNodeAddress = buffer.limit();
|
||||
buffer.position(linkAddressPosition);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(buffer, newNodeAddress);
|
||||
final int newNodeAddress = dictBuffer.limit();
|
||||
dictBuffer.position(linkAddressPosition);
|
||||
BinaryDictIOUtils.writeSInt24ToBuffer(dictBuffer, newNodeAddress);
|
||||
|
||||
final int[] characters = Arrays.copyOfRange(codePoints, wordPos, wordLen);
|
||||
final int flags = BinaryDictEncoder.makeCharGroupFlags(characters.length > 1,
|
||||
|
@ -490,7 +495,7 @@ public final class DynamicBinaryDictIOUtils {
|
|||
return;
|
||||
} else {
|
||||
depth--;
|
||||
buffer.position(nextLink);
|
||||
dictBuffer.position(nextLink);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
@ -24,7 +24,7 @@ import java.util.HashMap;
|
|||
/**
|
||||
* An interface to read a binary dictionary file header.
|
||||
*/
|
||||
public interface HeaderReaderInterface {
|
||||
public interface HeaderReader {
|
||||
public int readVersion() throws IOException, UnsupportedFormatException;
|
||||
public int readOptionFlags();
|
||||
public int readHeaderSize();
|
|
@ -28,7 +28,7 @@ import com.android.inputmethod.latin.ExpandableDictionary;
|
|||
import com.android.inputmethod.latin.LatinImeLogger;
|
||||
import com.android.inputmethod.latin.SuggestedWords.SuggestedWordInfo;
|
||||
import com.android.inputmethod.latin.WordComposer;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.settings.Settings;
|
||||
import com.android.inputmethod.latin.utils.CollectionUtils;
|
||||
|
@ -241,10 +241,10 @@ public abstract class DynamicPredictionDictionaryBase extends ExpandableDictiona
|
|||
};
|
||||
|
||||
// Load the dictionary from binary file
|
||||
final BinaryDictReader reader = new BinaryDictReader(
|
||||
final BinaryDictDecoder reader = new BinaryDictDecoder(
|
||||
new File(getContext().getFilesDir(), fileName));
|
||||
try {
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
||||
reader.openDictBuffer(new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
|
||||
} catch (FileNotFoundException e) {
|
||||
// This is an expected condition: we don't have a user history dictionary for this
|
||||
|
|
|
@ -16,17 +16,17 @@
|
|||
|
||||
package com.android.inputmethod.latin.utils;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
|
||||
/**
|
||||
* This class provides an implementation for the FusionDictionary buffer interface that is backed
|
||||
* by a simpled byte array. It allows to create a binary dictionary in memory.
|
||||
*/
|
||||
public final class ByteArrayWrapper implements FusionDictionaryBufferInterface {
|
||||
public final class ByteArrayDictBuffer implements DictBuffer {
|
||||
private byte[] mBuffer;
|
||||
private int mPosition;
|
||||
|
||||
public ByteArrayWrapper(final byte[] buffer) {
|
||||
public ByteArrayDictBuffer(final byte[] buffer) {
|
||||
mBuffer = buffer;
|
||||
mPosition = 0;
|
||||
}
|
|
@ -22,7 +22,6 @@ import com.android.inputmethod.annotations.UsedForTesting;
|
|||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictIOUtils;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
|
@ -119,13 +118,13 @@ public final class UserHistoryDictIOUtils {
|
|||
/**
|
||||
* Reads dictionary from file.
|
||||
*/
|
||||
public static void readDictionaryBinary(final BinaryDictReader reader,
|
||||
public static void readDictionaryBinary(final BinaryDictDecoder dictDecoder,
|
||||
final OnAddWordListener dict) {
|
||||
final Map<Integer, String> unigrams = CollectionUtils.newTreeMap();
|
||||
final Map<Integer, Integer> frequencies = CollectionUtils.newTreeMap();
|
||||
final Map<Integer, ArrayList<PendingAttribute>> bigrams = CollectionUtils.newTreeMap();
|
||||
try {
|
||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, unigrams, frequencies,
|
||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, unigrams, frequencies,
|
||||
bigrams);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IO exception while reading file", e);
|
||||
|
@ -157,7 +156,7 @@ public final class UserHistoryDictIOUtils {
|
|||
continue;
|
||||
}
|
||||
to.setBigram(word1, word2,
|
||||
BinaryDictDecoder.reconstructBigramFrequency(unigramFrequency,
|
||||
BinaryDictIOUtils.reconstructBigramFrequency(unigramFrequency,
|
||||
attr.mFrequency));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -22,7 +22,7 @@ import android.test.suitebuilder.annotation.LargeTest;
|
|||
import android.util.Log;
|
||||
import android.util.SparseArray;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
|
@ -44,7 +44,7 @@ import java.util.Random;
|
|||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* Unit tests for BinaryDictDecoder and BinaryDictEncoder.
|
||||
* Unit tests for BinaryDictDecoderUtils and BinaryDictEncoder.
|
||||
*/
|
||||
@LargeTest
|
||||
public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
||||
|
@ -118,14 +118,16 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
// Utilities for test
|
||||
|
||||
/**
|
||||
* Makes new buffer according to BUFFER_TYPE.
|
||||
* Makes new DictBuffer according to BUFFER_TYPE.
|
||||
*/
|
||||
private void getBuffer(final BinaryDictReader reader, final int bufferType)
|
||||
private void getDictBuffer(final BinaryDictDecoder dictDecoder, final int bufferType)
|
||||
throws FileNotFoundException, IOException {
|
||||
if (bufferType == USE_BYTE_BUFFER) {
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
} else if (bufferType == USE_BYTE_ARRAY) {
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -269,14 +271,14 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
final SparseArray<List<Integer>> bigrams, final Map<String, List<String>> shortcutMap,
|
||||
final int bufferType) {
|
||||
long now, diff = -1;
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
|
||||
FusionDictionary dict = null;
|
||||
try {
|
||||
getBuffer(reader, bufferType);
|
||||
assertNotNull(reader.getBuffer());
|
||||
getDictBuffer(dictDecoder, bufferType);
|
||||
assertNotNull(dictDecoder.getDictBuffer());
|
||||
now = System.currentTimeMillis();
|
||||
dict = BinaryDictDecoder.readDictionaryBinary(reader, null);
|
||||
dict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
|
||||
diff = System.currentTimeMillis() - now;
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IOException while reading dictionary", e);
|
||||
|
@ -388,7 +390,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
actBigrams.get(word1).add(word2);
|
||||
|
||||
final int bigramFreq = BinaryDictDecoder.reconstructBigramFrequency(
|
||||
final int bigramFreq = BinaryDictIOUtils.reconstructBigramFrequency(
|
||||
unigramFreq, attr.mFrequency);
|
||||
assertTrue(Math.abs(bigramFreq - BIGRAM_FREQ) < TOLERANCE_OF_BIGRAM_FREQ);
|
||||
}
|
||||
|
@ -407,12 +409,12 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
final Map<Integer, Integer> resultFreqs = CollectionUtils.newTreeMap();
|
||||
|
||||
long now = -1, diff = -1;
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
try {
|
||||
getBuffer(reader, bufferType);
|
||||
assertNotNull("Can't get buffer.", reader.getBuffer());
|
||||
getDictBuffer(dictDecoder, bufferType);
|
||||
assertNotNull("Can't get buffer.", dictDecoder.getDictBuffer());
|
||||
now = System.currentTimeMillis();
|
||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(reader, resultWords, resultFreqs,
|
||||
BinaryDictIOUtils.readUnigramsAndBigramsBinary(dictDecoder, resultWords, resultFreqs,
|
||||
resultBigrams);
|
||||
diff = System.currentTimeMillis() - now;
|
||||
} catch (IOException e) {
|
||||
|
@ -497,31 +499,31 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
// Tests for getTerminalPosition
|
||||
private String getWordFromBinary(final BinaryDictReader dictReader, final int address) {
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
if (buffer.position() != 0) buffer.position(0);
|
||||
private String getWordFromBinary(final BinaryDictDecoder dictDecoder, final int address) {
|
||||
final DictBuffer dictBuffer = dictDecoder.getDictBuffer();
|
||||
if (dictBuffer.position() != 0) dictBuffer.position(0);
|
||||
|
||||
FileHeader fileHeader = null;
|
||||
try {
|
||||
fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
} catch (IOException e) {
|
||||
return null;
|
||||
} catch (UnsupportedFormatException e) {
|
||||
return null;
|
||||
}
|
||||
if (fileHeader == null) return null;
|
||||
return BinaryDictDecoder.getWordAtAddress(buffer, fileHeader.mHeaderSize,
|
||||
return BinaryDictDecoderUtils.getWordAtAddress(dictBuffer, fileHeader.mHeaderSize,
|
||||
address - fileHeader.mHeaderSize, fileHeader.mFormatOptions).mWord;
|
||||
}
|
||||
|
||||
private long runGetTerminalPosition(final BinaryDictReader reader, final String word, int index,
|
||||
boolean contained) {
|
||||
private long runGetTerminalPosition(final BinaryDictDecoder dictDecoder, final String word,
|
||||
int index, boolean contained) {
|
||||
final int expectedFrequency = (UNIGRAM_FREQ + index) % 255;
|
||||
long diff = -1;
|
||||
int position = -1;
|
||||
try {
|
||||
final long now = System.nanoTime();
|
||||
position = BinaryDictIOUtils.getTerminalPosition(reader, word);
|
||||
position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
|
||||
diff = System.nanoTime() - now;
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IOException while getTerminalPosition", e);
|
||||
|
@ -530,7 +532,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD != position, contained);
|
||||
if (contained) assertEquals(getWordFromBinary(reader, position), word);
|
||||
if (contained) assertEquals(getWordFromBinary(dictDecoder, position), word);
|
||||
return diff;
|
||||
}
|
||||
|
||||
|
@ -550,28 +552,29 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
|
||||
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
try {
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
Log.e(TAG, "IOException while opening the buffer", e);
|
||||
}
|
||||
assertNotNull("Can't get the buffer", reader.getBuffer());
|
||||
assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
|
||||
|
||||
try {
|
||||
// too long word
|
||||
final String longWord = "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, longWord));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, longWord));
|
||||
|
||||
// null
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, null));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, null));
|
||||
|
||||
// empty string
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, ""));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, ""));
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
|
@ -579,7 +582,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
// Test a word that is contained within the dictionary.
|
||||
long sum = 0;
|
||||
for (int i = 0; i < sWords.size(); ++i) {
|
||||
final long time = runGetTerminalPosition(reader, sWords.get(i), i, true);
|
||||
final long time = runGetTerminalPosition(dictDecoder, sWords.get(i), i, true);
|
||||
sum += time == -1 ? 0 : time;
|
||||
}
|
||||
Log.d(TAG, "per a search : " + (((double)sum) / sWords.size() / 1000000));
|
||||
|
@ -590,7 +593,7 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
for (int i = 0; i < 1000; ++i) {
|
||||
final String word = generateWord(random, codePointSet);
|
||||
if (sWords.indexOf(word) != -1) continue;
|
||||
runGetTerminalPosition(reader, word, i, false);
|
||||
runGetTerminalPosition(dictDecoder, word, i, false);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -610,28 +613,28 @@ public class BinaryDictDecoderEncoderTests extends AndroidTestCase {
|
|||
addUnigrams(sWords.size(), dict, sWords, null /* shortcutMap */);
|
||||
timeWritingDictToFile(file, dict, VERSION3_WITH_DYNAMIC_UPDATE);
|
||||
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
try {
|
||||
reader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||
} catch (IOException e) {
|
||||
// ignore
|
||||
Log.e(TAG, "IOException while opening the buffer", e);
|
||||
}
|
||||
assertNotNull("Can't get the buffer", reader.getBuffer());
|
||||
assertNotNull("Can't get the buffer", dictDecoder.getDictBuffer());
|
||||
|
||||
try {
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
|
||||
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(0));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
|
||||
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(0));
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(0)));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(0)));
|
||||
|
||||
MoreAsserts.assertNotEqual(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
|
||||
DynamicBinaryDictIOUtils.deleteWord(reader, sWords.get(5));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
|
||||
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, sWords.get(5));
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD,
|
||||
BinaryDictIOUtils.getTerminalPosition(reader, sWords.get(5)));
|
||||
BinaryDictIOUtils.getTerminalPosition(dictDecoder, sWords.get(5)));
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
|
|
|
@ -16,14 +16,14 @@
|
|||
|
||||
package com.android.inputmethod.latin.makedict;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.FusionDictionaryBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
||||
FusionDictionaryBufferFromByteArrayFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
||||
FusionDictionaryBufferFromByteBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
||||
FusionDictionaryBufferFromWritableByteBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.DictionaryBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||
DictionaryBufferFromByteArrayFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||
DictionaryBufferFromReadOnlyByteBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||
DictionaryBufferFromWritableByteBufferFactory;
|
||||
|
||||
import android.test.AndroidTestCase;
|
||||
import android.util.Log;
|
||||
|
@ -33,10 +33,10 @@ import java.io.FileOutputStream;
|
|||
import java.io.IOException;
|
||||
|
||||
/**
|
||||
* Unit tests for BinaryDictReader
|
||||
* Unit tests for BinaryDictDecoder
|
||||
*/
|
||||
public class BinaryDictReaderTests extends AndroidTestCase {
|
||||
private static final String TAG = BinaryDictReaderTests.class.getSimpleName();
|
||||
public class BinaryDictDecoderTests extends AndroidTestCase {
|
||||
private static final String TAG = BinaryDictDecoderTests.class.getSimpleName();
|
||||
|
||||
private final byte[] data = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10 };
|
||||
|
||||
|
@ -61,7 +61,7 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
|||
|
||||
@SuppressWarnings("null")
|
||||
public void runTestOpenBuffer(final String testName,
|
||||
final FusionDictionaryBufferFactory factory) {
|
||||
final DictionaryBufferFactory factory) {
|
||||
File testFile = null;
|
||||
try {
|
||||
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
|
||||
|
@ -70,9 +70,9 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
assertNotNull(testFile);
|
||||
final BinaryDictReader reader = new BinaryDictReader(testFile);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
|
||||
try {
|
||||
reader.openBuffer(factory);
|
||||
dictDecoder.openDictBuffer(factory);
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Failed to open the buffer", e);
|
||||
}
|
||||
|
@ -80,32 +80,32 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
|||
writeDataToFile(testFile);
|
||||
|
||||
try {
|
||||
reader.openBuffer(factory);
|
||||
dictDecoder.openDictBuffer(factory);
|
||||
} catch (Exception e) {
|
||||
Log.e(TAG, "Raised the exception while opening buffer", e);
|
||||
}
|
||||
|
||||
assertEquals(testFile.length(), reader.getBuffer().capacity());
|
||||
assertEquals(testFile.length(), dictDecoder.getDictBuffer().capacity());
|
||||
}
|
||||
|
||||
public void testOpenBufferWithByteBuffer() {
|
||||
runTestOpenBuffer("testOpenBufferWithByteBuffer",
|
||||
new FusionDictionaryBufferFromByteBufferFactory());
|
||||
new DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
}
|
||||
|
||||
public void testOpenBufferWithByteArray() {
|
||||
runTestOpenBuffer("testOpenBufferWithByteArray",
|
||||
new FusionDictionaryBufferFromByteArrayFactory());
|
||||
new DictionaryBufferFromByteArrayFactory());
|
||||
}
|
||||
|
||||
public void testOpenBufferWithWritableByteBuffer() {
|
||||
runTestOpenBuffer("testOpenBufferWithWritableByteBuffer",
|
||||
new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
new DictionaryBufferFromWritableByteBufferFactory());
|
||||
}
|
||||
|
||||
@SuppressWarnings("null")
|
||||
public void runTestGetBuffer(final String testName,
|
||||
final FusionDictionaryBufferFactory factory) {
|
||||
final DictionaryBufferFactory factory) {
|
||||
File testFile = null;
|
||||
try {
|
||||
testFile = File.createTempFile(testName, ".tmp", getContext().getCacheDir());
|
||||
|
@ -113,40 +113,41 @@ public class BinaryDictReaderTests extends AndroidTestCase {
|
|||
Log.e(TAG, "IOException while the creating temporary file", e);
|
||||
}
|
||||
|
||||
final BinaryDictReader reader = new BinaryDictReader(testFile);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(testFile);
|
||||
|
||||
// the default return value of getBuffer() must be null.
|
||||
assertNull("the default return value of getBuffer() is not null", reader.getBuffer());
|
||||
assertNull("the default return value of getBuffer() is not null",
|
||||
dictDecoder.getDictBuffer());
|
||||
|
||||
writeDataToFile(testFile);
|
||||
assertTrue(testFile.exists());
|
||||
Log.d(TAG, "file length = " + testFile.length());
|
||||
|
||||
FusionDictionaryBufferInterface buffer = null;
|
||||
DictBuffer dictBuffer = null;
|
||||
try {
|
||||
buffer = reader.openAndGetBuffer(factory);
|
||||
dictBuffer = dictDecoder.openAndGetDictBuffer(factory);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "Failed to open and get the buffer", e);
|
||||
}
|
||||
assertNotNull("the buffer must not be null", buffer);
|
||||
assertNotNull("the buffer must not be null", dictBuffer);
|
||||
|
||||
for (int i = 0; i < data.length; ++i) {
|
||||
assertEquals(data[i], buffer.readUnsignedByte());
|
||||
assertEquals(data[i], dictBuffer.readUnsignedByte());
|
||||
}
|
||||
}
|
||||
|
||||
public void testGetBufferWithByteBuffer() {
|
||||
runTestGetBuffer("testGetBufferWithByteBuffer",
|
||||
new FusionDictionaryBufferFromByteBufferFactory());
|
||||
new DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
}
|
||||
|
||||
public void testGetBufferWithByteArray() {
|
||||
runTestGetBuffer("testGetBufferWithByteArray",
|
||||
new FusionDictionaryBufferFromByteArrayFactory());
|
||||
new DictionaryBufferFromByteArrayFactory());
|
||||
}
|
||||
|
||||
public void testGetBufferWithWritableByteBuffer() {
|
||||
runTestGetBuffer("testGetBufferWithWritableByteBuffer",
|
||||
new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
new DictionaryBufferFromWritableByteBufferFactory());
|
||||
}
|
||||
}
|
|
@ -21,9 +21,9 @@ import android.test.MoreAsserts;
|
|||
import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.FusionDictionaryBufferInterface;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader.
|
||||
FusionDictionaryBufferFromWritableByteBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils.DictBuffer;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder.
|
||||
DictionaryBufferFromWritableByteBufferFactory;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.PtNodeArray;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||
|
@ -112,26 +112,26 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
Log.d(TAG, " end address = " + info.mEndAddress);
|
||||
}
|
||||
|
||||
private static void printNode(final FusionDictionaryBufferInterface buffer,
|
||||
private static void printNode(final DictBuffer dictBuffer,
|
||||
final FormatSpec.FormatOptions formatOptions) {
|
||||
Log.d(TAG, "Node at " + buffer.position());
|
||||
final int count = BinaryDictDecoder.readCharGroupCount(buffer);
|
||||
Log.d(TAG, "Node at " + dictBuffer.position());
|
||||
final int count = BinaryDictDecoderUtils.readCharGroupCount(dictBuffer);
|
||||
Log.d(TAG, " charGroupCount = " + count);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoder.readCharGroup(buffer,
|
||||
buffer.position(), formatOptions);
|
||||
final CharGroupInfo currentInfo = BinaryDictDecoderUtils.readCharGroup(dictBuffer,
|
||||
dictBuffer.position(), formatOptions);
|
||||
printCharGroup(currentInfo);
|
||||
}
|
||||
if (formatOptions.mSupportsDynamicUpdate) {
|
||||
final int forwardLinkAddress = buffer.readUnsignedInt24();
|
||||
final int forwardLinkAddress = dictBuffer.readUnsignedInt24();
|
||||
Log.d(TAG, " forwardLinkAddress = " + forwardLinkAddress);
|
||||
}
|
||||
}
|
||||
|
||||
private static void printBinaryFile(final BinaryDictReader dictReader)
|
||||
private static void printBinaryFile(final BinaryDictDecoder dictDecoder)
|
||||
throws IOException, UnsupportedFormatException {
|
||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.getBuffer();
|
||||
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
final DictBuffer buffer = dictDecoder.getDictBuffer();
|
||||
while (buffer.position() < buffer.limit()) {
|
||||
printNode(buffer, fileHeader.mFormatOptions);
|
||||
}
|
||||
|
@ -139,13 +139,13 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
|
||||
private int getWordPosition(final File file, final String word) {
|
||||
int position = FormatSpec.NOT_VALID_WORD;
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
FileInputStream inStream = null;
|
||||
try {
|
||||
inStream = new FileInputStream(file);
|
||||
dictReader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
position = BinaryDictIOUtils.getTerminalPosition(dictReader, word);
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
position = BinaryDictIOUtils.getTerminalPosition(dictDecoder, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
} finally {
|
||||
|
@ -161,12 +161,12 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private CharGroupInfo findWordFromFile(final File file, final String word) {
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
CharGroupInfo info = null;
|
||||
try {
|
||||
dictReader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictReader, word);
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
info = BinaryDictIOUtils.findWordByBinaryDictReader(dictDecoder, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
|
@ -177,18 +177,18 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
private long insertAndCheckWord(final File file, final String word, final int frequency,
|
||||
final boolean exist, final ArrayList<WeightedString> bigrams,
|
||||
final ArrayList<WeightedString> shortcuts) {
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
BufferedOutputStream outStream = null;
|
||||
long amountOfTime = -1;
|
||||
try {
|
||||
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
|
||||
outStream = new BufferedOutputStream(new FileOutputStream(file, true));
|
||||
|
||||
if (!exist) {
|
||||
assertEquals(FormatSpec.NOT_VALID_WORD, getWordPosition(file, word));
|
||||
}
|
||||
final long now = System.nanoTime();
|
||||
DynamicBinaryDictIOUtils.insertWord(dictReader, outStream, word, frequency, bigrams,
|
||||
DynamicBinaryDictIOUtils.insertWord(dictDecoder, outStream, word, frequency, bigrams,
|
||||
shortcuts, false, false);
|
||||
amountOfTime = System.nanoTime() - now;
|
||||
outStream.flush();
|
||||
|
@ -211,23 +211,23 @@ public class BinaryDictIOUtilsTests extends AndroidTestCase {
|
|||
}
|
||||
|
||||
private void deleteWord(final File file, final String word) {
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
try {
|
||||
dictReader.openBuffer(new FusionDictionaryBufferFromWritableByteBufferFactory());
|
||||
DynamicBinaryDictIOUtils.deleteWord(dictReader, word);
|
||||
dictDecoder.openDictBuffer(new DictionaryBufferFromWritableByteBufferFactory());
|
||||
DynamicBinaryDictIOUtils.deleteWord(dictDecoder, word);
|
||||
} catch (IOException e) {
|
||||
} catch (UnsupportedFormatException e) {
|
||||
}
|
||||
}
|
||||
|
||||
private void checkReverseLookup(final File file, final String word, final int position) {
|
||||
final BinaryDictReader dictReader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
try {
|
||||
final FusionDictionaryBufferInterface buffer = dictReader.openAndGetBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
final FileHeader fileHeader = BinaryDictDecoder.readHeader(dictReader);
|
||||
final DictBuffer dictBuffer = dictDecoder.openAndGetDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
final FileHeader fileHeader = BinaryDictDecoderUtils.readHeader(dictDecoder);
|
||||
assertEquals(word,
|
||||
BinaryDictDecoder.getWordAtAddress(dictReader.getBuffer(),
|
||||
BinaryDictDecoderUtils.getWordAtAddress(dictDecoder.getDictBuffer(),
|
||||
fileHeader.mHeaderSize, position - fileHeader.mHeaderSize,
|
||||
fileHeader.mFormatOptions).mWord);
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -21,7 +21,7 @@ import android.test.AndroidTestCase;
|
|||
import android.test.suitebuilder.annotation.LargeTest;
|
||||
import android.util.Log;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||
|
@ -147,15 +147,16 @@ public class UserHistoryDictIOUtilsTests extends AndroidTestCase
|
|||
}
|
||||
|
||||
private void readDictFromFile(final File file, final OnAddWordListener listener) {
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
try {
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||
} catch (FileNotFoundException e) {
|
||||
Log.e(TAG, "file not found", e);
|
||||
} catch (IOException e) {
|
||||
Log.e(TAG, "IOException", e);
|
||||
}
|
||||
UserHistoryDictIOUtils.readDictionaryBinary(reader, listener);
|
||||
UserHistoryDictIOUtils.readDictionaryBinary(dictDecoder, listener);
|
||||
}
|
||||
|
||||
public void testGenerateFusionDictionary() {
|
||||
|
|
|
@ -28,7 +28,7 @@ LATINIME_ANNOTATIONS_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/annot
|
|||
LATINIME_CORE_SOURCE_DIRECTORY := $(LATINIME_BASE_SOURCE_DIRECTORY)/latin
|
||||
MAKEDICT_CORE_SOURCE_DIRECTORY := $(LATINIME_CORE_SOURCE_DIRECTORY)/makedict
|
||||
USED_TARGETTED_UTILS := \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayWrapper.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/ByteArrayDictBuffer.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/CollectionUtils.java \
|
||||
$(LATINIME_CORE_SOURCE_DIRECTORY)/utils/JniUtils.java
|
||||
|
||||
|
|
|
@ -16,8 +16,8 @@
|
|||
|
||||
package com.android.inputmethod.latin.dicttool;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.UnsupportedFormatException;
|
||||
|
||||
|
@ -97,7 +97,7 @@ public final class BinaryDictOffdeviceUtils {
|
|||
// over and over, ending in a stack overflow. Hence we limit the depth at which we try
|
||||
// decoding the file.
|
||||
if (depth > MAX_DECODE_DEPTH) return null;
|
||||
if (BinaryDictDecoder.isBinaryDictionary(src)) {
|
||||
if (BinaryDictDecoderUtils.isBinaryDictionary(src)) {
|
||||
spec.mFile = src;
|
||||
return spec;
|
||||
}
|
||||
|
@ -184,15 +184,15 @@ public final class BinaryDictOffdeviceUtils {
|
|||
crash(filename, new RuntimeException(
|
||||
filename + " does not seem to be a dictionary file"));
|
||||
} else {
|
||||
final BinaryDictReader reader = new BinaryDictReader(decodedSpec.mFile);
|
||||
reader.openBuffer(
|
||||
new BinaryDictReader.FusionDictionaryBufferFromByteArrayFactory());
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodedSpec.mFile);
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromByteArrayFactory());
|
||||
if (report) {
|
||||
System.out.println("Format : Binary dictionary format");
|
||||
System.out.println("Packaging : " + decodedSpec.describeChain());
|
||||
System.out.println("Uncompressed size : " + decodedSpec.mFile.length());
|
||||
}
|
||||
return BinaryDictDecoder.readDictionaryBinary(reader, null);
|
||||
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
|
||||
}
|
||||
}
|
||||
} catch (IOException e) {
|
||||
|
|
|
@ -16,9 +16,9 @@
|
|||
|
||||
package com.android.inputmethod.latin.dicttool;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.MakedictLog;
|
||||
|
@ -176,7 +176,7 @@ public class DictionaryMaker {
|
|||
inputUnigramXml = filename;
|
||||
} else if (CombinedInputOutput.isCombinedDictionary(filename)) {
|
||||
inputCombined = filename;
|
||||
} else if (BinaryDictDecoder.isBinaryDictionary(filename)) {
|
||||
} else if (BinaryDictDecoderUtils.isBinaryDictionary(filename)) {
|
||||
inputBinary = filename;
|
||||
} else {
|
||||
throw new IllegalArgumentException(
|
||||
|
@ -198,7 +198,7 @@ public class DictionaryMaker {
|
|||
}
|
||||
} else {
|
||||
if (null == inputBinary && null == inputUnigramXml) {
|
||||
if (BinaryDictDecoder.isBinaryDictionary(arg)) {
|
||||
if (BinaryDictDecoderUtils.isBinaryDictionary(arg)) {
|
||||
inputBinary = arg;
|
||||
} else if (CombinedInputOutput.isCombinedDictionary(arg)) {
|
||||
inputCombined = arg;
|
||||
|
@ -266,9 +266,10 @@ public class DictionaryMaker {
|
|||
private static FusionDictionary readBinaryFile(final String binaryFilename)
|
||||
throws FileNotFoundException, IOException, UnsupportedFormatException {
|
||||
final File file = new File(binaryFilename);
|
||||
final BinaryDictReader reader = new BinaryDictReader(file);
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
return BinaryDictDecoder.readDictionaryBinary(reader, null);
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(file);
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
return BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder, null);
|
||||
}
|
||||
|
||||
/**
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
package com.android.inputmethod.latin.dicttool;
|
||||
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictDecoderUtils;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictEncoder;
|
||||
import com.android.inputmethod.latin.makedict.BinaryDictReader;
|
||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary;
|
||||
import com.android.inputmethod.latin.makedict.FusionDictionary.DictionaryOptions;
|
||||
|
@ -67,9 +67,10 @@ public class BinaryDictOffdeviceUtilsTests extends TestCase {
|
|||
assertEquals("Wrong decode spec", BinaryDictOffdeviceUtils.COMPRESSION, step);
|
||||
}
|
||||
assertEquals("Wrong decode spec", 3, decodeSpec.mDecoderSpec.size());
|
||||
final BinaryDictReader reader = new BinaryDictReader(decodeSpec.mFile);
|
||||
reader.openBuffer(new BinaryDictReader.FusionDictionaryBufferFromByteBufferFactory());
|
||||
final FusionDictionary resultDict = BinaryDictDecoder.readDictionaryBinary(reader,
|
||||
final BinaryDictDecoder dictDecoder = new BinaryDictDecoder(decodeSpec.mFile);
|
||||
dictDecoder.openDictBuffer(
|
||||
new BinaryDictDecoder.DictionaryBufferFromReadOnlyByteBufferFactory());
|
||||
final FusionDictionary resultDict = BinaryDictDecoderUtils.readDictionaryBinary(dictDecoder,
|
||||
null /* dict : an optional dictionary to add words to, or null */);
|
||||
assertEquals("Dictionary can't be read back correctly",
|
||||
FusionDictionary.findWordInTree(resultDict.mRootNodeArray, "foo").getFrequency(),
|
||||
|
|
Loading…
Reference in a new issue