Read the group count correctly from the binary format

...in makedict, for reverse makedict.

Change-Id: I74774b50998eb605e8091d95cebdda6ad7f5fce3
main
Jean Chalard 2012-01-16 17:37:24 +09:00
parent 67e13976b7
commit 9b8b2c757d
1 changed files with 36 additions and 14 deletions

View File

@ -264,20 +264,29 @@ public class BinaryDictInputOutput {
return size; return size;
} }
/**
* Compute the binary size of the group count
* @param count the group count
* @return the size of the group count, either 1 or 2 bytes.
*/
private static int getGroupCountSize(final int count) {
if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= count) {
return 1;
} else if (MAX_CHARGROUPS_IN_A_NODE >= count) {
return 2;
} else {
throw new RuntimeException("Can't have more than " + MAX_CHARGROUPS_IN_A_NODE
+ " groups in a node (found " + count +")");
}
}
/** /**
* Compute the binary size of the group count for a node * Compute the binary size of the group count for a node
* @param node the node * @param node the node
* @return the size of the group count, either 1 or 2 bytes. * @return the size of the group count, either 1 or 2 bytes.
*/ */
private static int getGroupCountSize(final Node node) { private static int getGroupCountSize(final Node node) {
if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= node.mData.size()) { return getGroupCountSize(node.mData.size());
return 1;
} else if (MAX_CHARGROUPS_IN_A_NODE >= node.mData.size()) {
return 2;
} else {
throw new RuntimeException("Can't have more than " + MAX_CHARGROUPS_IN_A_NODE
+ " groups in a node (found " + node.mData.size() +")");
}
} }
/** /**
@ -913,7 +922,7 @@ public class BinaryDictInputOutput {
addressPointer += 3; addressPointer += 3;
break; break;
default: default:
throw new RuntimeException("Has attribute with no address"); throw new RuntimeException("Has shortcut targets with no address");
} }
shortcutTargets.add(new PendingAttribute(targetFlags & FLAG_ATTRIBUTE_FREQUENCY, shortcutTargets.add(new PendingAttribute(targetFlags & FLAG_ATTRIBUTE_FREQUENCY,
targetAddress)); targetAddress));
@ -944,7 +953,7 @@ public class BinaryDictInputOutput {
addressPointer += 3; addressPointer += 3;
break; break;
default: default:
throw new RuntimeException("Has attribute with no address"); throw new RuntimeException("Has bigrams with no address");
} }
bigrams.add(new PendingAttribute(bigramFlags & FLAG_ATTRIBUTE_FREQUENCY, bigrams.add(new PendingAttribute(bigramFlags & FLAG_ATTRIBUTE_FREQUENCY,
bigramAddress)); bigramAddress));
@ -955,6 +964,19 @@ public class BinaryDictInputOutput {
childrenAddress, shortcutTargets, bigrams); childrenAddress, shortcutTargets, bigrams);
} }
/**
* Reads and returns the char group count out of a file and forwards the pointer.
*/
private static int readCharGroupCount(RandomAccessFile source) throws IOException {
final int msb = source.readUnsignedByte();
if (MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT >= msb) {
return msb;
} else {
return ((MAX_CHARGROUPS_FOR_ONE_BYTE_CHARGROUP_COUNT & msb) << 8)
+ source.readUnsignedByte();
}
}
/** /**
* Finds, as a string, the word at the address passed as an argument. * Finds, as a string, the word at the address passed as an argument.
* *
@ -968,8 +990,8 @@ public class BinaryDictInputOutput {
int address) throws IOException { int address) throws IOException {
final long originalPointer = source.getFilePointer(); final long originalPointer = source.getFilePointer();
source.seek(headerSize); source.seek(headerSize);
final int count = source.readUnsignedByte(); final int count = readCharGroupCount(source);
int groupOffset = 1; // 1 for the group count int groupOffset = getGroupCountSize(count);
final StringBuilder builder = new StringBuilder(); final StringBuilder builder = new StringBuilder();
String result = null; String result = null;
@ -1025,9 +1047,9 @@ public class BinaryDictInputOutput {
Map<Integer, Node> reverseNodeMap, Map<Integer, CharGroup> reverseGroupMap) Map<Integer, Node> reverseNodeMap, Map<Integer, CharGroup> reverseGroupMap)
throws IOException { throws IOException {
final int nodeOrigin = (int)(source.getFilePointer() - headerSize); final int nodeOrigin = (int)(source.getFilePointer() - headerSize);
final int count = source.readUnsignedByte(); final int count = readCharGroupCount(source);
final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>(); final ArrayList<CharGroup> nodeContents = new ArrayList<CharGroup>();
int groupOffset = nodeOrigin + 1; // 1 byte for the group count int groupOffset = nodeOrigin + getGroupCountSize(count);
for (int i = count; i > 0; --i) { for (int i = count; i > 0; --i) {
CharGroupInfo info = readCharGroup(source, groupOffset); CharGroupInfo info = readCharGroup(source, groupOffset);
ArrayList<WeightedString> shortcutTargets = null; ArrayList<WeightedString> shortcutTargets = null;