Add read support for string shortcuts for makedict.

Change-Id: I48ee4fc9ac703ad2a680b3cd848de91c415ea3c8
This commit is contained in:
Jean Chalard 2012-03-27 17:59:30 +09:00
parent 3bbb31f3f0
commit 752996540f
2 changed files with 26 additions and 35 deletions

View file

@ -268,6 +268,19 @@ public class BinaryDictInputOutput {
return index - origin; return index - origin;
} }
/**
* Reads a string from a RandomAccessFile. This is the converse of the above method.
*/
private static String readString(final RandomAccessFile source) throws IOException {
final StringBuilder s = new StringBuilder();
int character = readChar(source);
while (character != INVALID_CHARACTER) {
s.appendCodePoint(character);
character = readChar(source);
}
return s.toString();
}
/** /**
* Reads a character from the file. * Reads a character from the file.
* *
@ -995,36 +1008,19 @@ public class BinaryDictInputOutput {
childrenAddress = NO_CHILDREN_ADDRESS; childrenAddress = NO_CHILDREN_ADDRESS;
break; break;
} }
ArrayList<PendingAttribute> shortcutTargets = null; ArrayList<WeightedString> shortcutTargets = null;
if (0 != (flags & FLAG_HAS_SHORTCUT_TARGETS)) { if (0 != (flags & FLAG_HAS_SHORTCUT_TARGETS)) {
shortcutTargets = new ArrayList<PendingAttribute>(); final long pointerBefore = source.getFilePointer();
shortcutTargets = new ArrayList<WeightedString>();
source.readUnsignedShort(); // Skip the size
while (true) { while (true) {
final int targetFlags = source.readUnsignedByte(); final int targetFlags = source.readUnsignedByte();
++addressPointer; final String word = CharEncoding.readString(source);
final int sign = 0 == (targetFlags & FLAG_ATTRIBUTE_OFFSET_NEGATIVE) ? 1 : -1; shortcutTargets.add(new WeightedString(word,
int targetAddress = addressPointer; targetFlags & FLAG_ATTRIBUTE_FREQUENCY));
switch (targetFlags & MASK_ATTRIBUTE_ADDRESS_TYPE) {
case FLAG_ATTRIBUTE_ADDRESS_TYPE_ONEBYTE:
targetAddress += sign * source.readUnsignedByte();
addressPointer += 1;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_TWOBYTES:
targetAddress += sign * source.readUnsignedShort();
addressPointer += 2;
break;
case FLAG_ATTRIBUTE_ADDRESS_TYPE_THREEBYTES:
final int offset = ((source.readUnsignedByte() << 16)
+ source.readUnsignedShort());
targetAddress += sign * offset;
addressPointer += 3;
break;
default:
throw new RuntimeException("Has shortcut targets with no address");
}
shortcutTargets.add(new PendingAttribute(targetFlags & FLAG_ATTRIBUTE_FREQUENCY,
targetAddress));
if (0 == (targetFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break; if (0 == (targetFlags & FLAG_ATTRIBUTE_HAS_NEXT)) break;
} }
addressPointer += (source.getFilePointer() - pointerBefore);
} }
ArrayList<PendingAttribute> bigrams = null; ArrayList<PendingAttribute> bigrams = null;
if (0 != (flags & FLAG_HAS_BIGRAMS)) { if (0 != (flags & FLAG_HAS_BIGRAMS)) {
@ -1149,14 +1145,7 @@ public class BinaryDictInputOutput {
int groupOffset = nodeOrigin + getGroupCountSize(count); int groupOffset = nodeOrigin + getGroupCountSize(count);
for (int i = count; i > 0; --i) { for (int i = count; i > 0; --i) {
CharGroupInfo info = readCharGroup(source, groupOffset); CharGroupInfo info = readCharGroup(source, groupOffset);
ArrayList<WeightedString> shortcutTargets = null; ArrayList<WeightedString> shortcutTargets = info.mShortcutTargets;
if (null != info.mShortcutTargets) {
shortcutTargets = new ArrayList<WeightedString>();
for (PendingAttribute target : info.mShortcutTargets) {
final String word = getWordAtAddress(source, headerSize, target.mAddress);
shortcutTargets.add(new WeightedString(word, target.mFrequency));
}
}
ArrayList<WeightedString> bigrams = null; ArrayList<WeightedString> bigrams = null;
if (null != info.mBigrams) { if (null != info.mBigrams) {
bigrams = new ArrayList<WeightedString>(); bigrams = new ArrayList<WeightedString>();

View file

@ -16,6 +16,8 @@
package com.android.inputmethod.latin.makedict; package com.android.inputmethod.latin.makedict;
import com.android.inputmethod.latin.makedict.FusionDictionary.WeightedString;
import java.util.ArrayList; import java.util.ArrayList;
/** /**
@ -29,12 +31,12 @@ public class CharGroupInfo {
public final int[] mCharacters; public final int[] mCharacters;
public final int mFrequency; public final int mFrequency;
public final int mChildrenAddress; public final int mChildrenAddress;
public final ArrayList<PendingAttribute> mShortcutTargets; public final ArrayList<WeightedString> mShortcutTargets;
public final ArrayList<PendingAttribute> mBigrams; public final ArrayList<PendingAttribute> mBigrams;
public CharGroupInfo(final int originalAddress, final int endAddress, final int flags, public CharGroupInfo(final int originalAddress, final int endAddress, final int flags,
final int[] characters, final int frequency, final int childrenAddress, final int[] characters, final int frequency, final int childrenAddress,
final ArrayList<PendingAttribute> shortcutTargets, final ArrayList<WeightedString> shortcutTargets,
final ArrayList<PendingAttribute> bigrams) { final ArrayList<PendingAttribute> bigrams) {
mOriginalAddress = originalAddress; mOriginalAddress = originalAddress;
mEndAddress = endAddress; mEndAddress = endAddress;