Add read support for string shortcuts for makedict.

Change-Id: I48ee4fc9ac703ad2a680b3cd848de91c415ea3c8
This commit is contained in:
Jean Chalard 2012-03-27 17:59:30 +09:00
parent 3bbb31f3f0
commit 752996540f
2 changed files with 26 additions and 35 deletions

View file

@ -268,6 +268,19 @@ public class BinaryDictInputOutput {
return index - origin;
}
/**
* Reads a string from a RandomAccessFile. This is the converse of the above method.
*/
private static String readString(final RandomAccessFile source) throws IOException {
final StringBuilder s = new StringBuilder();
int character = readChar(source);
while (character != INVALID_CHARACTER) {
s.appendCodePoint(character);
character = readChar(source);
}
return s.toString();
}
/**
* Reads a character from the file.
*
@ -995,36 +1008,19 @@ public class BinaryDictInputOutput {
childrenAddress = NO_CHILDREN_ADDRESS;
break;
}
ArrayList<PendingAttribute> shortcutTargets = null;
ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FLAG_HAS_SHORTCUT_TARGETS)) {
shortcutTargets = new ArrayList<PendingAttribute>();
final long pointerBefore = source.getFilePointer();
shortcutTargets = new ArrayList<WeightedString>();
source.readUnsignedShort(); // Skip the size
while (true) {
final int targetFlags = source.readUnsignedByte();
++addressPointer;
final int sign = 0 == (targetFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1;
int targetAddress = addressPointer;
switch (targetFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
targetAddress += sign * source.readUnsignedByte();
addressPointer += 1;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
targetAddress += sign * source.readUnsignedShort();
addressPointer += 2;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
final int offset = ((source.readUnsignedByte() << 16)
+ source.readUnsignedShort());
targetAddress += sign * offset;
addressPointer += 3;
break;
default:
throw new RuntimeException("Has shortcut targets with no address");
}
shortcutTargets.add(new PendingAttribute(targetFlags & FLAG_ATTRIBUTE_FREQUENCY,
targetAddress));
final String word = CharEncoding.readString(source);
shortcutTargets.add(new WeightedString(word,
targetFlags & FLAG_ATTRIBUTE_FREQUENCY));
if (0 == (targetFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
}
addressPointer += (source.getFilePointer() - pointerBefore);
}
ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FLAG_HAS_BIGRAMS)) {
@ -1149,14 +1145,7 @@ public class BinaryDictInputOutput {
int groupOffset = nodeOrigin + getGroupCountSize(count);
for (int i = count; i > 0; --i) {
CharGroupInfo info = readCharGroup(source, groupOffset);
ArrayList<WeightedString> shortcutTargets = null;
if (null != info.mShortcutTargets) {
shortcutTargets = new ArrayList<WeightedString>();
for (PendingAttribute target : info.mShortcutTargets) {
final String word = getWordAtAddress(source, headerSize, target.mAddress);
shortcutTargets.add(new WeightedString(word, target.mFrequency));
}
}
ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>();

View file

@ -16,6 +16,8 @@
package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.util.ArrayList;
/**
@ -29,12 +31,12 @@ public class CharGroupInfo {
public final int[] mCharacters;
public final int mFrequency;
public final int mChildrenAddress;
public final ArrayList<PendingAttribute> mShortcutTargets;
public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<PendingAttribute> mBigrams;
public CharGroupInfo(final int originalAddress, final int endAddress, final int flags,
final int[] characters, final int frequency, final int childrenAddress,
final ArrayList<PendingAttribute> shortcutTargets,
final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams) {
mOriginalAddress = originalAddress;
mEndAddress = endAddress;