📄 converttoarff.java
字号:
public void addNominalAttribute(String name, String xpath) { addAttribute(new NominalAttribute(name, xpath)); } public void addClassificationAttribute() { addAttribute(new ClassificationAttribute()); } public void addNumericAttribute(String name, String xpath) { addAttribute(new NumericAttribute(name, xpath)); } public void addPriorityAttribute() { addAttribute(new PriorityAttribute()); } public void addIdAttribute() { addAttribute(new IdAttribute()); } public void addAppNameAttribute() { addAttribute(new AppNameAttribute()); } /** * Convert a single Document to ARFF format. * * @param relationName the relation name * @param document the Document * @param appName the application name * @param out Writer to write the ARFF output to */ public void convert(String relationName, Document document, String appName, final Writer out) throws IOException, MissingNodeException { scan(document, appName); generateHeader(relationName, out); generateInstances(document, appName, out); } /** * Scan a Document to find out the ranges of attributes. * All Documents must be scanned before generating the ARFF * header and instances. * * @param document the Document * @param appName the application name */ public void scan(Document document, final String appName) throws MissingNodeException, IOException { List bugInstanceList = getBugInstanceList(document); for (Iterator i = bugInstanceList.iterator(); i.hasNext(); ) { final Element element = (Element) i.next(); scanAttributeList(new AttributeCallback() { public void apply(Attribute attribute) throws MissingNodeException { attribute.scan(element, appName); } }); } } /** * Generate ARFF header. * Documents must have already been scanned. * * @param relationName the relation name * @param out Writer to write the ARFF output to */ public void generateHeader(String relationName, final Writer out) throws MissingNodeException, IOException { out.write("@relation "); out.write(relationName); out.write("\n\n"); scanAttributeList(new AttributeCallback() { public void apply(Attribute attribute) throws IOException { out.write("@attribute "); out.write(attribute.getName()); out.write(" "); out.write(attribute.getRange()); out.write("\n"); } }); out.write("\n"); out.write("@data\n"); } /** * Generate instances from given Document. * Document should already have been scanned, and the ARFF header generated. * * @param document the Document * @param appName the application name * @param out Writer to write the ARFF output to */ public void generateInstances(Document document, final String appName, final Writer out) throws MissingNodeException, IOException { List bugInstanceList = getBugInstanceList(document); for (Iterator i = bugInstanceList.iterator(); i.hasNext(); ) { final Element element = (Element) i.next(); scanAttributeList(new AttributeCallback() { boolean first = true; public void apply(Attribute attribute) throws IOException { if (!first) out.write(","); first = false; String value; try { value = attribute.getInstanceValue(element, appName); } catch (MissingNodeException e) { value = "?"; } out.write(value); } }); out.write("\n"); } } /** * Apply a callback to all Attributes. * * @param callback the callback */ public void scanAttributeList(AttributeCallback callback) throws MissingNodeException, IOException { for (Iterator<Attribute> i = attributeList.iterator(); i.hasNext();) { Attribute attribute = i.next(); callback.apply(attribute); } } public void addDefaultAttributes() { // This conversion scheme is arbitrary. // FIXME: method and field signatures? addIdAttribute(); addNominalAttribute("bugtype", "@type"); addNominalAttribute("class", "./Class[1]/@classname"); addNominalAttribute("methodname", "./Method[1]/@name"); addNominalAttribute("auxmethodclass", "./Method[2]/@classname"); addNominalAttribute("auxmethodname", "./Method[2]/@name"); addNominalAttribute("fieldclass", "./Field[1]/@classname"); addNominalAttribute("fieldname", "./Field[1]/@name"); //addNumericAttribute("priority", "@priority"); addPriorityAttribute(); addClassificationAttribute(); } // ------------------------------------------------------------ // Implementation // ------------------------------------------------------------ private static int getBugClassification(String annotationText) { StringTokenizer tok = new StringTokenizer(annotationText, " \t\r\n\f.,:;-"); int state = UNCLASSIFIED; while (tok.hasMoreTokens()) { String s = tok.nextToken(); if (s.equals("BUG")) state |= BUG; else if (s.equals("NOT_BUG")) state |= NOT_BUG; else if (s.equals("HARMLESS")) state |= HARMLESS; } if ((state & NOT_BUG) != 0) return NOT_BUG; else if ((state & BUG) != 0) return ((state & HARMLESS) != 0) ? HARMLESS_BUG : BUG; else return UNCLASSIFIED; } private List getBugInstanceList(Document document) { List bugInstanceList = document.selectNodes("/BugCollection/BugInstance"); if (dropUnclassifiedWarnings) { for (Iterator i = bugInstanceList.iterator(); i.hasNext(); ) { Element element = (Element) i.next(); String annotationText = element.valueOf("./UserAnnotation[text()]"); int classification = getBugClassification(annotationText); if (classification == UNCLASSIFIED) i.remove(); } } return bugInstanceList; } private static class C2ACommandLine extends CommandLine { private ConvertToARFF converter = new ConvertToARFF(); public C2ACommandLine() { addSwitch("-train", "drop unclassified warnings"); addSwitch("-id", "add unique id attribute"); addSwitch("-app", "add application name attribute"); addSwitch("-default", "add default attributes"); addOption("-nominal", "attrName,xpath", "add a nominal attribute"); addOption("-numeric", "attrName,xpath", "add a numeric attribute"); addSwitch("-classification", "add bug classification attribute"); addSwitch("-priority", "add priority attribute"); } public ConvertToARFF getConverter() { return converter; } protected void handleOption(String option, String optionExtraPart) throws IOException { if (option.equals("-train")) { converter.dropUnclassifiedWarnings(); } else if (option.equals("-id")) { converter.addIdAttribute(); } else if (option.equals("-app")) { converter.addAppNameAttribute(); } else if (option.equals("-default")) { converter.addDefaultAttributes(); } else if (option.equals("-classification")) { converter.addClassificationAttribute(); } else if (option.equals("-priority")) { converter.addPriorityAttribute(); } } protected void handleOptionWithArgument(String option, String argument) throws IOException { if (option.equals("-nominal") || option.equals("-numeric")) { int comma = argument.indexOf(','); if (comma < 0) { throw new IllegalArgumentException("Missing comma separating attribute name and xpath in " + option + " option: " + argument); } String attrName = argument.substring(0, comma); String xpath = argument.substring(comma + 1); converter.addAttribute(option.equals("-nominal") ? new NominalAttribute(attrName, xpath) : new NumericAttribute(attrName, xpath)); } } public void printUsage(PrintStream out) { out.println("Usage: " + ConvertToARFF.class.getName() + " [options] <relation name> <output file> <findbugs results> [<findbugs results>...]"); super.printUsage(out); } } public static String toAppName(String fileName) { // Remove file extension, if any int lastDot = fileName.lastIndexOf('.'); if (lastDot >= 0) fileName = fileName.substring(0, lastDot); return fileName; } public static void main(String[] argv) throws Exception { // Expand any option files argv = CommandLine.expandOptionFiles(argv, true, true); // Parse command line arguments C2ACommandLine commandLine = new C2ACommandLine(); int argCount = commandLine.parse(argv); if (argCount > argv.length - 3) { commandLine.printUsage(System.err); System.exit(1); } String relationName = argv[argCount++]; String outputFileName = argv[argCount++]; // Create the converter ConvertToARFF converter = commandLine.getConverter(); // Open output file Writer out = new OutputStreamWriter(new BufferedOutputStream( new FileOutputStream(outputFileName))); // Read documents, // scan documents to find ranges of attributes List<DataFile> dataFileList = new ArrayList<DataFile>(); while (argCount < argv.length) { String fileName = argv[argCount++]; // Read input file as dom4j tree SAXReader reader = new SAXReader(); Document document = reader.read(fileName); DataFile dataFile = new DataFile(document, toAppName(fileName)); dataFileList.add(dataFile); converter.scan(dataFile.getDocument(), dataFile.getAppName()); } // Generate ARFF header converter.generateHeader(relationName, out); // Generate instances from each document for (Iterator<DataFile> i = dataFileList.iterator(); i.hasNext(); ) { DataFile dataFile = i.next(); converter.generateInstances(dataFile.getDocument(), dataFile.getAppName(), out); } out.close(); }}// vim:ts=4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -