Merge "Add writeCharGroup."
commit
00e1d421c2
|
@ -22,9 +22,13 @@ import com.android.inputmethod.latin.makedict.BinaryDictInputOutput.FusionDictio
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FileHeader;
|
||||||
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
import com.android.inputmethod.latin.makedict.FormatSpec.FormatOptions;
|
||||||
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
import com.android.inputmethod.latin.makedict.FusionDictionary.CharGroup;
|
||||||
|
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
|
||||||
|
|
||||||
import java.io.IOException;
|
import java.io.IOException;
|
||||||
|
import java.io.OutputStream;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
|
import java.util.Iterator;
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Stack;
|
import java.util.Stack;
|
||||||
|
|
||||||
|
@ -251,7 +255,7 @@ public final class BinaryDictIOUtils {
|
||||||
buffer.put((byte)newFlags);
|
buffer.put((byte)newFlags);
|
||||||
}
|
}
|
||||||
|
|
||||||
private static void putSInt24(final FusionDictionaryBufferInterface buffer,
|
private static void writeSInt24ToBuffer(final FusionDictionaryBufferInterface buffer,
|
||||||
final int value) {
|
final int value) {
|
||||||
final int absValue = Math.abs(value);
|
final int absValue = Math.abs(value);
|
||||||
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
buffer.put((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
||||||
|
@ -259,6 +263,32 @@ public final class BinaryDictIOUtils {
|
||||||
buffer.put((byte)(absValue & 0xFF));
|
buffer.put((byte)(absValue & 0xFF));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void writeSInt24ToStream(final OutputStream destination, final int value)
|
||||||
|
throws IOException {
|
||||||
|
final int absValue = Math.abs(value);
|
||||||
|
destination.write((byte)(((value < 0 ? 0x80 : 0) | (absValue >> 16)) & 0xFF));
|
||||||
|
destination.write((byte)((absValue >> 8) & 0xFF));
|
||||||
|
destination.write((byte)(absValue & 0xFF));
|
||||||
|
}
|
||||||
|
|
||||||
|
private static void writeVariableAddress(final OutputStream destination, final int value)
|
||||||
|
throws IOException {
|
||||||
|
switch (BinaryDictInputOutput.getByteSize(value)) {
|
||||||
|
case 1:
|
||||||
|
destination.write((byte)value);
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
destination.write((byte)(0xFF & (value >> 8)));
|
||||||
|
destination.write((byte)(0xFF & value));
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
destination.write((byte)(0xFF & (value >> 16)));
|
||||||
|
destination.write((byte)(0xFF & (value >> 8)));
|
||||||
|
destination.write((byte)(0xFF & value));
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update a parent address in a CharGroup that is addressed by groupOriginAddress.
|
* Update a parent address in a CharGroup that is addressed by groupOriginAddress.
|
||||||
*
|
*
|
||||||
|
@ -277,7 +307,7 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
final int flags = buffer.readUnsignedByte();
|
final int flags = buffer.readUnsignedByte();
|
||||||
final int parentOffset = newParentAddress - groupOriginAddress;
|
final int parentOffset = newParentAddress - groupOriginAddress;
|
||||||
putSInt24(buffer, parentOffset);
|
writeSInt24ToBuffer(buffer, parentOffset);
|
||||||
buffer.position(originalPosition);
|
buffer.position(originalPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -293,6 +323,22 @@ public final class BinaryDictIOUtils {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private static void writeString(final OutputStream destination, final String word)
|
||||||
|
throws IOException {
|
||||||
|
final int length = word.length();
|
||||||
|
for (int i = 0; i < length; i = word.offsetByCodePoints(i, 1)) {
|
||||||
|
final int codePoint = word.codePointAt(i);
|
||||||
|
if (CharEncoding.getCharSize(codePoint) == 1) {
|
||||||
|
destination.write((byte)codePoint);
|
||||||
|
} else {
|
||||||
|
destination.write((byte)(0xFF & (codePoint >> 16)));
|
||||||
|
destination.write((byte)(0xFF & (codePoint >> 8)));
|
||||||
|
destination.write((byte)(0xFF & codePoint));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR);
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
|
* Update a children address in a CharGroup that is addressed by groupOriginAddress.
|
||||||
*
|
*
|
||||||
|
@ -312,7 +358,82 @@ public final class BinaryDictIOUtils {
|
||||||
if ((FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
|
if ((FormatSpec.FLAG_IS_TERMINAL) != 0) buffer.readUnsignedByte();
|
||||||
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
|
final int childrenOffset = newChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS
|
||||||
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
|
? FormatSpec.NO_CHILDREN_ADDRESS : newChildrenAddress - buffer.position();
|
||||||
putSInt24(buffer, childrenOffset);
|
writeSInt24ToBuffer(buffer, childrenOffset);
|
||||||
buffer.position(originalPosition);
|
buffer.position(originalPosition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Write a char group to an output stream.
|
||||||
|
* A char group is an in-memory representation of a node in trie.
|
||||||
|
* A char group info is an on-disk representation of a node.
|
||||||
|
*
|
||||||
|
* @param destination the stream to write.
|
||||||
|
* @param info the char group info to be written.
|
||||||
|
*/
|
||||||
|
public static void writeCharGroup(final OutputStream destination, final CharGroupInfo info)
|
||||||
|
throws IOException {
|
||||||
|
destination.write((byte)info.mFlags);
|
||||||
|
final int parentOffset = info.mParentAddress == FormatSpec.NO_PARENT_ADDRESS ?
|
||||||
|
FormatSpec.NO_PARENT_ADDRESS : info.mParentAddress - info.mOriginalAddress;
|
||||||
|
writeSInt24ToStream(destination, parentOffset);
|
||||||
|
|
||||||
|
for (int i = 0; i < info.mCharacters.length; ++i) {
|
||||||
|
if (CharEncoding.getCharSize(info.mCharacters[i]) == 1) {
|
||||||
|
destination.write((byte)info.mCharacters[i]);
|
||||||
|
} else {
|
||||||
|
writeSInt24ToStream(destination, info.mCharacters[i]);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (info.mCharacters.length > 1) {
|
||||||
|
destination.write((byte)FormatSpec.GROUP_CHARACTERS_TERMINATOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
if ((info.mFlags & FormatSpec.FLAG_IS_TERMINAL) != 0) {
|
||||||
|
destination.write((byte)info.mFrequency);
|
||||||
|
}
|
||||||
|
|
||||||
|
final int childrenOffset = info.mChildrenAddress == FormatSpec.NO_CHILDREN_ADDRESS ?
|
||||||
|
0 : info.mChildrenAddress - info.mOriginalAddress;
|
||||||
|
writeSInt24ToStream(destination, childrenOffset);
|
||||||
|
|
||||||
|
if (info.mShortcutTargets != null && info.mShortcutTargets.size() > 0) {
|
||||||
|
final int shortcutListSize =
|
||||||
|
BinaryDictInputOutput.getShortcutListSize(info.mShortcutTargets);
|
||||||
|
destination.write((byte)(shortcutListSize >> 8));
|
||||||
|
destination.write((byte)(shortcutListSize & 0xFF));
|
||||||
|
final Iterator<WeightedString> shortcutIterator = info.mShortcutTargets.iterator();
|
||||||
|
while (shortcutIterator.hasNext()) {
|
||||||
|
final WeightedString target = shortcutIterator.next();
|
||||||
|
destination.write((byte)BinaryDictInputOutput.makeShortcutFlags(
|
||||||
|
shortcutIterator.hasNext(), target.mFrequency));
|
||||||
|
writeString(destination, target.mWord);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (info.mBigrams != null) {
|
||||||
|
// TODO: Consolidate this code with the code that computes the size of the bigram list
|
||||||
|
// in BinaryDictionaryInputOutput#computeActualNodeSize
|
||||||
|
for (int i = 0; i < info.mBigrams.size(); ++i) {
|
||||||
|
final int bigramOffset = info.mBigrams.get(i).mAddress - info.mOriginalAddress;
|
||||||
|
final int bigramFrequency = info.mBigrams.get(i).mFrequency;
|
||||||
|
int bigramFlags = (i < info.mBigrams.size() - 1)
|
||||||
|
? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0;
|
||||||
|
bigramFlags |= (bigramOffset < 0) ? FormatSpec.FLAG_ATTRIBUTE_OFFSET_NEGATIVE : 0;
|
||||||
|
switch (BinaryDictInputOutput.getByteSize(bigramOffset)) {
|
||||||
|
case 1:
|
||||||
|
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE;
|
||||||
|
break;
|
||||||
|
case 2:
|
||||||
|
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES;
|
||||||
|
break;
|
||||||
|
case 3:
|
||||||
|
bigramFlags |= FormatSpec.FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
bigramFlags |= bigramFrequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY;
|
||||||
|
destination.write((byte)bigramFlags);
|
||||||
|
writeVariableAddress(destination, Math.abs(bigramOffset));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -153,7 +153,7 @@ public final class BinaryDictInputOutput {
|
||||||
* @param character the character code.
|
* @param character the character code.
|
||||||
* @return the size in binary encoded-form, either 1 or 3 bytes.
|
* @return the size in binary encoded-form, either 1 or 3 bytes.
|
||||||
*/
|
*/
|
||||||
private static int getCharSize(final int character) {
|
static int getCharSize(final int character) {
|
||||||
// See char encoding in FusionDictionary.java
|
// See char encoding in FusionDictionary.java
|
||||||
if (fitsOnOneByte(character)) return 1;
|
if (fitsOnOneByte(character)) return 1;
|
||||||
if (FormatSpec.INVALID_CHARACTER == character) return 1;
|
if (FormatSpec.INVALID_CHARACTER == character) return 1;
|
||||||
|
@ -337,7 +337,7 @@ public final class BinaryDictInputOutput {
|
||||||
* This is known in advance and does not change according to position in the file
|
* This is known in advance and does not change according to position in the file
|
||||||
* like address lists do.
|
* like address lists do.
|
||||||
*/
|
*/
|
||||||
private static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
|
static int getShortcutListSize(final ArrayList<WeightedString> shortcutList) {
|
||||||
if (null == shortcutList) return 0;
|
if (null == shortcutList) return 0;
|
||||||
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
int size = FormatSpec.GROUP_SHORTCUT_LIST_SIZE_SIZE;
|
||||||
for (final WeightedString shortcut : shortcutList) {
|
for (final WeightedString shortcut : shortcutList) {
|
||||||
|
@ -438,7 +438,7 @@ public final class BinaryDictInputOutput {
|
||||||
* @param address the address
|
* @param address the address
|
||||||
* @return the byte size.
|
* @return the byte size.
|
||||||
*/
|
*/
|
||||||
private static int getByteSize(final int address) {
|
static int getByteSize(final int address) {
|
||||||
assert(address <= UINT24_MAX);
|
assert(address <= UINT24_MAX);
|
||||||
if (!hasChildrenAddress(address)) {
|
if (!hasChildrenAddress(address)) {
|
||||||
return 0;
|
return 0;
|
||||||
|
@ -858,7 +858,7 @@ public final class BinaryDictInputOutput {
|
||||||
* @param frequency the frequency of the attribute, 0..15
|
* @param frequency the frequency of the attribute, 0..15
|
||||||
* @return the flags
|
* @return the flags
|
||||||
*/
|
*/
|
||||||
private static final int makeShortcutFlags(final boolean more, final int frequency) {
|
static final int makeShortcutFlags(final boolean more, final int frequency) {
|
||||||
return (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0)
|
return (more ? FormatSpec.FLAG_ATTRIBUTE_HAS_NEXT : 0)
|
||||||
+ (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
|
+ (frequency & FormatSpec.FLAG_ATTRIBUTE_FREQUENCY);
|
||||||
}
|
}
|
||||||
|
@ -896,6 +896,7 @@ public final class BinaryDictInputOutput {
|
||||||
*/
|
*/
|
||||||
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
|
private static int writePlacedNode(final FusionDictionary dict, byte[] buffer,
|
||||||
final Node node, final FormatOptions formatOptions) {
|
final Node node, final FormatOptions formatOptions) {
|
||||||
|
// TODO: Make the code in common with BinaryDictIOUtils#writeCharGroup
|
||||||
int index = node.mCachedAddress;
|
int index = node.mCachedAddress;
|
||||||
|
|
||||||
final int groupCount = node.mData.size();
|
final int groupCount = node.mData.size();
|
||||||
|
|
Loading…
Reference in New Issue