📄 converttoarff.java
字号:
/* * Machine Learning support for FindBugs * Copyright (C) 2004, University of Maryland * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with this library; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package edu.umd.cs.findbugs.ml;import edu.umd.cs.findbugs.CommandLine;import java.io.BufferedOutputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.OutputStreamWriter;import java.io.PrintStream;import java.io.Writer;import java.util.ArrayList;import java.util.Collection;import java.util.Iterator;import java.util.LinkedList;import java.util.List;import java.util.Set;import java.util.StringTokenizer;import java.util.TreeSet;import org.dom4j.Document;import org.dom4j.Element;import org.dom4j.Node;import org.dom4j.io.SAXReader;/** * Convert a BugCollection into ARFF format. * See Witten and Frank, <em>Data Mining</em>, ISBN 1-55860-552-5. * * @see BugCollection * @see BugInstance * @author David Hovemeyer */public class ConvertToARFF { // ------------------------------------------------------------ // Helper classes // ------------------------------------------------------------ private static class DataFile { private Document document; private String appName; public DataFile(Document document, String appName) { this.document = document; this.appName = appName; } public Document getDocument() { return document; } public String getAppName() { return appName; } } private static class MissingNodeException extends Exception { private static final long serialVersionUID = -5042140832791541208L; public MissingNodeException(String msg) { super(msg); } } public interface Attribute { public String getName(); public void scan(Element element, String appName) throws MissingNodeException; public String getRange(); public String getInstanceValue(Element element, String appName) throws MissingNodeException; } private abstract static class XPathAttribute implements Attribute { private String name; private String xpath; public XPathAttribute(String name, String xpath) { this.name = name; this.xpath = xpath; } public String getName() { return name; } public String getInstanceValue(Element element, String appName) throws MissingNodeException { Object value = element.selectObject(xpath); if (value == null) throw new MissingNodeException("Could not get value from element (path=" + xpath + ")"); if (value instanceof List) { List list = (List) value; value = list.get(0); } if (value instanceof Node) { Node node = (Node) value; return node.getText(); } else if (value instanceof String) { return (String) value; } else if (value instanceof Number) { String s = value.toString(); if (s.endsWith(".0")) s = s.substring(0, s.length() - 2); return s; } else throw new IllegalStateException("Unexpected object returned from xpath query: " + value); } } public static class NominalAttribute extends XPathAttribute { private Set<String> possibleValueSet; public NominalAttribute(String name, String xpath) { super(name, xpath); this.possibleValueSet = new TreeSet<String>(); } public void scan(Element element, String appName) { try { possibleValueSet.add(getInstanceValue(element, appName)); } catch (MissingNodeException ignore) { // Ignore: we'll just use an n/a value for this instance } } public String getRange() { return collectionToRange(possibleValueSet); } public String getInstanceValue(Element element, String appName) throws MissingNodeException { return "\"" + super.getInstanceValue(element, appName) + "\""; } } private static final int UNCLASSIFIED = 0; private static final int BUG = 1; private static final int NOT_BUG = 2; private static final int HARMLESS = 4; private static final int HARMLESS_BUG = HARMLESS | BUG; public static class ClassificationAttribute implements Attribute { public String getName() { return "classification"; } public void scan(Element element, String appName) throws MissingNodeException { } public String getRange() { return "{bug,not_bug,harmless_bug}"; } public String getInstanceValue(Element element, String appName) throws MissingNodeException { String annotationText = element.valueOf("./UserAnnotation[text()]"); //System.out.println("annotationText=" + annotationText); int state = getBugClassification(annotationText); if (state == NOT_BUG) return "not_bug"; else if (state == BUG) return "bug"; else if (state == HARMLESS_BUG) return "harmless_bug"; else throw new MissingNodeException("Unclassified warning"); } } public static class NumericAttribute extends XPathAttribute { public NumericAttribute(String name, String xpath) { super(name, xpath); } public void scan(Element element, String appName) throws MissingNodeException { } public String getRange() { return "numeric"; } } public static class PriorityAttribute implements Attribute { public String getName() { return "priority"; } public void scan(Element element, String appName) throws MissingNodeException { } public String getRange() { return "{low,medium,high}"; } public String getInstanceValue(Element element, String appName) throws MissingNodeException { org.dom4j.Attribute attribute = element.attribute("priority"); if (attribute == null) throw new MissingNodeException("Missing priority attribute"); String value = attribute.getValue(); try { int prio = Integer.parseInt(value); switch (prio) { case 1: return "high"; case 2: return "medium"; case 3: return "low"; default: return "?"; } } catch (NumberFormatException e) { throw new MissingNodeException("Invalid priority value: " + value); } } } /** * An attribute that just gives each instance a unique id. * Obviously, this attribute shouldn't be used as input * to a learning algorithm. */ public static class IdAttribute implements Attribute { private int count = 0; public String getName() { return "id"; } public void scan(Element element, String appName) throws MissingNodeException { } public String getRange() { return "numeric"; } public String getInstanceValue(Element element, String appName) throws MissingNodeException { return String.valueOf(count++); } } public static class AppNameAttribute implements Attribute { private Set<String> appNameSet = new TreeSet<String>(); public String getName() { return "appname"; } public void scan(Element element, String appName) throws MissingNodeException { appNameSet.add(appName); } public String getRange() { return collectionToRange(appNameSet); } public String getInstanceValue(Element element, String appName) throws MissingNodeException { return "\"" + appName + "\""; } } public static String collectionToRange(Collection<String> collection) { StringBuffer buf = new StringBuffer(); buf.append("{"); for (Iterator<String> i = collection.iterator(); i.hasNext();) { if (buf.length() > 1) buf.append(','); buf.append(i.next()); } buf.append("}"); return buf.toString(); } public interface AttributeCallback { public void apply(Attribute attribute) throws MissingNodeException, IOException; } // ------------------------------------------------------------ // Fields // ------------------------------------------------------------ private List<Attribute> attributeList; private boolean dropUnclassifiedWarnings; // ------------------------------------------------------------ // Public methods // ------------------------------------------------------------ public ConvertToARFF() { this.attributeList = new LinkedList<Attribute>(); this.dropUnclassifiedWarnings = false; } public void dropUnclassifiedWarnings() { this.dropUnclassifiedWarnings = true; } public void addAttribute(Attribute attribute) { attributeList.add(attribute); }
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -