Fix a bug where ampersands in XML would not work

Change-Id: Icc1bce28b78c4da2d529c308f251e298ea94539f
main
Jean Chalard 2011-09-28 16:21:23 +09:00
parent a59f65ad0c
commit e12e29ca7f
1 changed files with 11 additions and 3 deletions

View File

@ -60,6 +60,7 @@ public class XmlDictInputOutput {
final FusionDictionary mDictionary; final FusionDictionary mDictionary;
int mState; // the state of the parser int mState; // the state of the parser
int mFreq; // the currently read freq int mFreq; // the currently read freq
String mWord; // the current word
final HashMap<String, ArrayList<WeightedString>> mBigramsMap; final HashMap<String, ArrayList<WeightedString>> mBigramsMap;
/** /**
@ -72,6 +73,7 @@ public class XmlDictInputOutput {
HashMap<String, ArrayList<WeightedString>> bigrams) { HashMap<String, ArrayList<WeightedString>> bigrams) {
mDictionary = dict; mDictionary = dict;
mBigramsMap = bigrams; mBigramsMap = bigrams;
mWord = "";
mState = START; mState = START;
mFreq = 0; mFreq = 0;
} }
@ -80,6 +82,7 @@ public class XmlDictInputOutput {
public void startElement(String uri, String localName, String qName, Attributes attrs) { public void startElement(String uri, String localName, String qName, Attributes attrs) {
if (WORD_TAG.equals(localName)) { if (WORD_TAG.equals(localName)) {
mState = WORD; mState = WORD;
mWord = "";
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) { for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
final String attrName = attrs.getLocalName(attrIndex); final String attrName = attrs.getLocalName(attrIndex);
if (FREQUENCY_ATTR.equals(attrName)) { if (FREQUENCY_ATTR.equals(attrName)) {
@ -94,14 +97,19 @@ public class XmlDictInputOutput {
@Override @Override
public void characters(char[] ch, int start, int length) { public void characters(char[] ch, int start, int length) {
if (WORD == mState) { if (WORD == mState) {
final String word = String.copyValueOf(ch, start, length); // The XML parser is free to return text in arbitrary chunks one after the
mDictionary.add(word, mFreq, mBigramsMap.get(word)); // other. In particular, this happens in some implementations when it finds
// an escape code like "&amp;".
mWord += String.copyValueOf(ch, start, length);
} }
} }
@Override @Override
public void endElement(String uri, String localName, String qName) { public void endElement(String uri, String localName, String qName) {
if (WORD == mState) mState = START; if (WORD == mState) {
mDictionary.add(mWord, mFreq, mBigramsMap.get(mWord));
mState = START;
}
} }
} }