am 4c34454a: Make bigram handler a son of a more generic class (B1)
* commit '4c34454a70bc75633e1c76478f86f4bcce0b64d4': Make bigram handler a son of a more generic class (B1)main
commit
394e7f13dd
|
@ -113,15 +113,12 @@ public class XmlDictInputOutput {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
static private class AssociativeListHandler extends DefaultHandler {
|
||||||
* SAX handler for a bigram XML file.
|
private final String SRC_TAG;
|
||||||
*/
|
private final String SRC_ATTRIBUTE;
|
||||||
static private class BigramHandler extends DefaultHandler {
|
private final String DST_TAG;
|
||||||
private final static String BIGRAM_W1_TAG = "bi";
|
private final String DST_ATTRIBUTE;
|
||||||
private final static String BIGRAM_W2_TAG = "w";
|
private final String DST_FREQ;
|
||||||
private final static String BIGRAM_W1_ATTRIBUTE = "w1";
|
|
||||||
private final static String BIGRAM_W2_ATTRIBUTE = "w2";
|
|
||||||
private final static String BIGRAM_FREQ_ATTRIBUTE = "p";
|
|
||||||
|
|
||||||
// In this version of the XML file, the bigram frequency is given as an int 0..XML_MAX
|
// In this version of the XML file, the bigram frequency is given as an int 0..XML_MAX
|
||||||
private final static int XML_MAX = 256;
|
private final static int XML_MAX = 256;
|
||||||
|
@ -129,31 +126,57 @@ public class XmlDictInputOutput {
|
||||||
private final static int MEMORY_MAX = 16;
|
private final static int MEMORY_MAX = 16;
|
||||||
private final static int XML_TO_MEMORY_RATIO = XML_MAX / MEMORY_MAX;
|
private final static int XML_TO_MEMORY_RATIO = XML_MAX / MEMORY_MAX;
|
||||||
|
|
||||||
String mW1;
|
private String mSrc;
|
||||||
final HashMap<String, ArrayList<WeightedString>> mBigramsMap;
|
private final HashMap<String, ArrayList<WeightedString>> mAssocMap;
|
||||||
|
|
||||||
public BigramHandler() {
|
public AssociativeListHandler(final String srcTag, final String srcAttribute,
|
||||||
mW1 = null;
|
final String dstTag, final String dstAttribute, final String dstFreq) {
|
||||||
mBigramsMap = new HashMap<String, ArrayList<WeightedString>>();
|
SRC_TAG = srcTag;
|
||||||
|
SRC_ATTRIBUTE = srcAttribute;
|
||||||
|
DST_TAG = dstTag;
|
||||||
|
DST_ATTRIBUTE = dstAttribute;
|
||||||
|
DST_FREQ = dstFreq;
|
||||||
|
mSrc = null;
|
||||||
|
mAssocMap = new HashMap<String, ArrayList<WeightedString>>();
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void startElement(String uri, String localName, String qName, Attributes attrs) {
|
public void startElement(String uri, String localName, String qName, Attributes attrs) {
|
||||||
if (BIGRAM_W1_TAG.equals(localName)) {
|
if (SRC_TAG.equals(localName)) {
|
||||||
mW1 = attrs.getValue(uri, BIGRAM_W1_ATTRIBUTE);
|
mSrc = attrs.getValue(uri, SRC_ATTRIBUTE);
|
||||||
} else if (BIGRAM_W2_TAG.equals(localName)) {
|
} else if (DST_TAG.equals(localName)) {
|
||||||
String w2 = attrs.getValue(uri, BIGRAM_W2_ATTRIBUTE);
|
String dst = attrs.getValue(uri, DST_ATTRIBUTE);
|
||||||
int freq = Integer.parseInt(attrs.getValue(uri, BIGRAM_FREQ_ATTRIBUTE));
|
int freq = Integer.parseInt(attrs.getValue(uri, DST_FREQ));
|
||||||
WeightedString bigram = new WeightedString(w2, freq / XML_TO_MEMORY_RATIO);
|
WeightedString bigram = new WeightedString(dst, freq / XML_TO_MEMORY_RATIO);
|
||||||
ArrayList<WeightedString> bigramList = mBigramsMap.get(mW1);
|
ArrayList<WeightedString> bigramList = mAssocMap.get(mSrc);
|
||||||
if (null == bigramList) bigramList = new ArrayList<WeightedString>();
|
if (null == bigramList) bigramList = new ArrayList<WeightedString>();
|
||||||
bigramList.add(bigram);
|
bigramList.add(bigram);
|
||||||
mBigramsMap.put(mW1, bigramList);
|
mAssocMap.put(mSrc, bigramList);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public HashMap<String, ArrayList<WeightedString>> getAssocMap() {
|
||||||
|
return mAssocMap;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* SAX handler for a bigram XML file.
|
||||||
|
*/
|
||||||
|
static private class BigramHandler extends AssociativeListHandler {
|
||||||
|
private final static String BIGRAM_W1_TAG = "bi";
|
||||||
|
private final static String BIGRAM_W2_TAG = "w";
|
||||||
|
private final static String BIGRAM_W1_ATTRIBUTE = "w1";
|
||||||
|
private final static String BIGRAM_W2_ATTRIBUTE = "w2";
|
||||||
|
private final static String BIGRAM_FREQ_ATTRIBUTE = "p";
|
||||||
|
|
||||||
|
public BigramHandler() {
|
||||||
|
super(BIGRAM_W1_TAG, BIGRAM_W1_ATTRIBUTE, BIGRAM_W2_TAG, BIGRAM_W2_ATTRIBUTE,
|
||||||
|
BIGRAM_FREQ_ATTRIBUTE);
|
||||||
|
}
|
||||||
|
|
||||||
public HashMap<String, ArrayList<WeightedString>> getBigramMap() {
|
public HashMap<String, ArrayList<WeightedString>> getBigramMap() {
|
||||||
return mBigramsMap;
|
return getAssocMap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue