📄 converttoarff.java
字号:
/* * Machine Learning support for FindBugs * Copyright (C) 2004,2005 University of Maryland * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package edu.umd.cs.findbugs.ml;import java.io.BufferedOutputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStreamWriter;import java.io.PrintStream;import java.io.Writer;import java.util.ArrayList;import java.util.Collection;import java.util.IdentityHashMap;import java.util.Iterator;import java.util.LinkedList;import java.util.List;import java.util.Random;import java.util.Set;import java.util.StringTokenizer;import java.util.TreeSet;import org.dom4j.Document;import org.dom4j.Element;import org.dom4j.Node;import org.dom4j.io.SAXReader;import edu.umd.cs.findbugs.BugCollection;import edu.umd.cs.findbugs.BugInstance;import edu.umd.cs.findbugs.config.CommandLine;/** * Convert a BugCollection into ARFF format. * See Witten and Frank, <em>Data Mining</em>, ISBN 1-55860-552-5. * * @see BugCollection * @see BugInstance * @author David Hovemeyer */public class ConvertToARFF { // ------------------------------------------------------------ // Helper classes // ------------------------------------------------------------ private static class DataFile { private Document document; private String appName; public DataFile(Document document, String appName) { this.document = document; this.appName = appName; } public Document getDocument() { return document; } public String getAppName() { return appName; } } private static class MissingNodeException extends Exception { private static final long serialVersionUID = -5042140832791541208L; public MissingNodeException(String msg) { super(msg); } } public interface Attribute { public String getName(); public void scan(Element element, String appName) throws MissingNodeException; public String getRange(); public String getInstanceValue(Element element, String appName) throws MissingNodeException; } private abstract static class XPathAttribute implements Attribute { private String name; private String xpath; public XPathAttribute(String name, String xpath) { this.name = name; this.xpath = xpath; } public String getName() { return name; } public String getInstanceValue(Element element, String appName) throws MissingNodeException { Object value = element.selectObject(xpath); if (value == null) throw new MissingNodeException("Could not get value from element (path=" + xpath + ")"); if (value instanceof List) { List list = (List) value; if (list.size() == 0) throw new MissingNodeException("Could not get value from element (path=" + xpath + ")"); value = list.get(0); } if (value instanceof Node) { Node node = (Node) value; return node.getText(); } else if (value instanceof String) { return (String) value; } else if (value instanceof Number) { String s = value.toString(); if (s.endsWith(".0")) s = s.substring(0, s.length() - 2); return s; } else throw new IllegalStateException("Unexpected object returned from xpath query: " + value); } } public static class NominalAttribute extends XPathAttribute { private Set<String> possibleValueSet; public NominalAttribute(String name, String xpath) { super(name, xpath); this.possibleValueSet = new TreeSet<String>(); } public void scan(Element element, String appName) { try { possibleValueSet.add(getInstanceValue(element, appName)); } catch (MissingNodeException ignore) { // Ignore: we'll just use an n/a value for this instance } } public String getRange() { return collectionToRange(possibleValueSet); } @Override public String getInstanceValue(Element element, String appName) throws MissingNodeException { return "\"" + super.getInstanceValue(element, appName) + "\""; } } public static class BooleanAttribute extends XPathAttribute { public BooleanAttribute(String name, String xpath) { super(name, xpath); } public void scan(Element element, String appName) throws MissingNodeException { // Nothing to do. } public String getRange() { return "{true, false}"; } @Override public String getInstanceValue(Element element, String appName) throws MissingNodeException { try { String value = super.getInstanceValue(element, appName); return "\"" + Boolean.valueOf(value).toString() + "\""; } catch (MissingNodeException e) { return "\"false\""; } } } private static final int UNCLASSIFIED = 0; private static final int BUG = 1; private static final int NOT_BUG = 2; private static final int HARMLESS = 4; private static final int HARMLESS_BUG = HARMLESS | BUG; public static abstract class AbstractClassificationAttribute implements Attribute { /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName() */ public String getName() { return "classification"; } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String) */ public void scan(Element element, String appName) throws MissingNodeException { } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String) */ public String getInstanceValue(Element element, String appName) throws MissingNodeException { String annotationText = element.valueOf("./UserAnnotation[text()]"); //System.out.println("annotationText=" + annotationText); int state = getBugClassification(annotationText); return bugToString(state); } protected abstract String bugToString(int bugType) throws MissingNodeException; } public static class ClassificationAttribute extends AbstractClassificationAttribute { public String getRange() { return "{bug,not_bug,harmless_bug}"; } @Override protected String bugToString(int state) throws MissingNodeException { if (state == NOT_BUG) return "not_bug"; else if (state == BUG) return "bug"; else if (state == HARMLESS_BUG) return "harmless_bug"; else throw new MissingNodeException("Unclassified warning"); } } public static class BinaryClassificationAttribute extends AbstractClassificationAttribute { /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange() */ public String getRange() { return "{bug, not_bug}"; } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.AbstractClassificationAttribute#bugToString(int) */ @Override protected String bugToString(int state) throws MissingNodeException { if (state == BUG) return "bug"; else if (state == NOT_BUG || state == HARMLESS_BUG) return "not_bug"; else throw new MissingNodeException("unclassified warning"); } } public static class NumericAttribute extends XPathAttribute { public NumericAttribute(String name, String xpath) { super(name, xpath); } public void scan(Element element, String appName) throws MissingNodeException { } public String getRange() { return "numeric"; } } public static class PriorityAttribute implements Attribute { public String getName() { return "priority"; } public void scan(Element element, String appName) throws MissingNodeException { } public String getRange() { return "{low,medium,high}"; } public String getInstanceValue(Element element, String appName) throws MissingNodeException { org.dom4j.Attribute attribute = element.attribute("priority"); if (attribute == null) throw new MissingNodeException("Missing priority attribute"); String value = attribute.getValue(); try { int prio = Integer.parseInt(value); switch (prio) { case 1: return "high"; case 2: return "medium"; case 3: return "low"; default: return "?"; } } catch (NumberFormatException e) { throw new MissingNodeException("Invalid priority value: " + value); } } } /** * An attribute that just gives each instance a unique id. * The application name is prepended, so each unique id * really unique, even across applications. * Obviously, this attribute shouldn't be used as input * to a learning algorithm. * * <p>Uses the Element's uid attribute if it has one.</p> */ public static class IdAttribute implements Attribute { private TreeSet<String> possibleValueSet = new TreeSet<String>(); private boolean scanning = true; private int count = 0; public String getName() { return "id"; } public void scan(Element element, String appName) throws MissingNodeException { possibleValueSet.add(instanceValue(element, appName)); } public String getRange() { return collectionToRange(possibleValueSet); } public String getInstanceValue(Element element, String appName) throws MissingNodeException { if (scanning) { count = 0; scanning = false; } return instanceValue(element, appName); } private String instanceValue(Element element, String appName) { String nextId; org.dom4j.Attribute uidAttr= element.attribute("uid"); if (uidAttr != null) { nextId = uidAttr.getValue(); } else { nextId = String.valueOf(count++); } return "\"" + appName + "-" + nextId + "\""; } } public static class IdStringAttribute implements Attribute { /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName() */ public String getName() { return "ids"; } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String) */ public void scan(Element element, String appName) throws MissingNodeException { } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange() */ public String getRange() { return "string"; } int count = 0; /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String) */ public String getInstanceValue(Element element, String appName) throws MissingNodeException { String value; org.dom4j.Attribute uidAttr = element.attribute("uid"); if (uidAttr == null) { value = String.valueOf(count++); } else { value = uidAttr.getStringValue(); } return "\"" + appName + "-" + value + "\""; } } private static final String RANDOM_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz"; public static class RandomIdAttribute implements Attribute { private Random rng = new Random(); private IdentityHashMap<Element, String> idMap = new IdentityHashMap<Element, String>(); /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getName() */ public String getName() { return "idr"; } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#scan(org.dom4j.Element, java.lang.String) */ public void scan(Element element, String appName) throws MissingNodeException { idMap.put(element, generateId()); } private String generateId() { StringBuffer buf = new StringBuffer(); for (int i = 0; i < 20; ++i) { char c = RANDOM_CHARS.charAt(rng.nextInt(RANDOM_CHARS.length())); buf.append(c); } return buf.toString(); } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getRange() */ public String getRange() { TreeSet<String> range = new TreeSet<String>(); range.addAll(idMap.values()); if (range.size() != idMap.size()) throw new IllegalStateException("id collision!"); return collectionToRange(range); } /* (non-Javadoc) * @see edu.umd.cs.findbugs.ml.ConvertToARFF.Attribute#getInstanceValue(org.dom4j.Element, java.lang.String) */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -