Merge "Fix a bug where ampersands in XML would not work"
This commit is contained in:
commit
55072fefe6
1 changed files with 11 additions and 3 deletions
|
@ -60,6 +60,7 @@ public class XmlDictInputOutput {
|
||||||
final FusionDictionary mDictionary;
|
final FusionDictionary mDictionary;
|
||||||
int mState; // the state of the parser
|
int mState; // the state of the parser
|
||||||
int mFreq; // the currently read freq
|
int mFreq; // the currently read freq
|
||||||
|
String mWord; // the current word
|
||||||
final HashMap<String, ArrayList<WeightedString>> mBigramsMap;
|
final HashMap<String, ArrayList<WeightedString>> mBigramsMap;
|
||||||
|
|
||||||
/**
|
/**
|
||||||
|
@ -72,6 +73,7 @@ public class XmlDictInputOutput {
|
||||||
HashMap<String, ArrayList<WeightedString>> bigrams) {
|
HashMap<String, ArrayList<WeightedString>> bigrams) {
|
||||||
mDictionary = dict;
|
mDictionary = dict;
|
||||||
mBigramsMap = bigrams;
|
mBigramsMap = bigrams;
|
||||||
|
mWord = "";
|
||||||
mState = START;
|
mState = START;
|
||||||
mFreq = 0;
|
mFreq = 0;
|
||||||
}
|
}
|
||||||
|
@ -80,6 +82,7 @@ public class XmlDictInputOutput {
|
||||||
public void startElement(String uri, String localName, String qName, Attributes attrs) {
|
public void startElement(String uri, String localName, String qName, Attributes attrs) {
|
||||||
if (WORD_TAG.equals(localName)) {
|
if (WORD_TAG.equals(localName)) {
|
||||||
mState = WORD;
|
mState = WORD;
|
||||||
|
mWord = "";
|
||||||
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
|
for (int attrIndex = 0; attrIndex < attrs.getLength(); ++attrIndex) {
|
||||||
final String attrName = attrs.getLocalName(attrIndex);
|
final String attrName = attrs.getLocalName(attrIndex);
|
||||||
if (FREQUENCY_ATTR.equals(attrName)) {
|
if (FREQUENCY_ATTR.equals(attrName)) {
|
||||||
|
@ -94,14 +97,19 @@ public class XmlDictInputOutput {
|
||||||
@Override
|
@Override
|
||||||
public void characters(char[] ch, int start, int length) {
|
public void characters(char[] ch, int start, int length) {
|
||||||
if (WORD == mState) {
|
if (WORD == mState) {
|
||||||
final String word = String.copyValueOf(ch, start, length);
|
// The XML parser is free to return text in arbitrary chunks one after the
|
||||||
mDictionary.add(word, mFreq, mBigramsMap.get(word));
|
// other. In particular, this happens in some implementations when it finds
|
||||||
|
// an escape code like "&".
|
||||||
|
mWord += String.copyValueOf(ch, start, length);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void endElement(String uri, String localName, String qName) {
|
public void endElement(String uri, String localName, String qName) {
|
||||||
if (WORD == mState) mState = START;
|
if (WORD == mState) {
|
||||||
|
mDictionary.add(mWord, mFreq, mBigramsMap.get(mWord));
|
||||||
|
mState = START;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue