📄 converttoarff.java
字号:
public String getInstanceValue(Element element, String appName) throws MissingNodeException { String id = idMap.get(element); if (id == null) throw new IllegalStateException("Element not scanned?"); return "\"" + id + "\""; } } public static class AppNameAttribute implements Attribute { private Set<String> appNameSet = new TreeSet<String>(); public String getName() { return "appname"; } public void scan(Element element, String appName) throws MissingNodeException { appNameSet.add(appName); } public String getRange() { return collectionToRange(appNameSet); } public String getInstanceValue(Element element, String appName) throws MissingNodeException { return "\"" + appName + "\""; } } public static String collectionToRange(Collection<String> collection) { StringBuffer buf = new StringBuffer(); buf.append("{"); for (String aCollection : collection) { if (buf.length() > 1) buf.append(','); buf.append(aCollection); } buf.append("}"); return buf.toString(); } public interface AttributeCallback { public void apply(Attribute attribute) throws MissingNodeException, IOException; } // ------------------------------------------------------------ // Constants // ------------------------------------------------------------ private static final String DEFAULT_NODE_SELECTION_XPATH = "/BugCollection/BugInstance"; // ------------------------------------------------------------ // Fields // ------------------------------------------------------------ private List<Attribute> attributeList; private String nodeSelectionXpath; private boolean dropUnclassifiedWarnings; private String appName; // ------------------------------------------------------------ // Public methods // ------------------------------------------------------------ public ConvertToARFF() { this.attributeList = new LinkedList<Attribute>(); this.nodeSelectionXpath = DEFAULT_NODE_SELECTION_XPATH; this.dropUnclassifiedWarnings = false; } public void setAppName(String appName) { this.appName = appName; } /** * Set the xpath expression used to select BugInstance nodes. * * @param nodeSelectionXpath the node selection xpath expression */ public void setNodeSelectionXpath(String nodeSelectionXpath) { this.nodeSelectionXpath = nodeSelectionXpath; } public int getNumAttributes() { return attributeList.size(); } public void dropUnclassifiedWarnings() { this.dropUnclassifiedWarnings = true; } public void addAttribute(Attribute attribute) { attributeList.add(attribute); } public void addNominalAttribute(String name, String xpath) { addAttribute(new NominalAttribute(name, xpath)); } public void addBooleanAttribute(String name, String xpath) { addAttribute(new BooleanAttribute(name, xpath)); } public void addClassificationAttribute() { addAttribute(new ClassificationAttribute()); } public void addNumericAttribute(String name, String xpath) { addAttribute(new NumericAttribute(name, xpath)); } public void addPriorityAttribute() { addAttribute(new PriorityAttribute()); } public void addIdAttribute() { addAttribute(new IdAttribute()); } public void addAppNameAttribute() { addAttribute(new AppNameAttribute()); } /** * Convert a single Document to ARFF format. * * @param relationName the relation name * @param document the Document * @param appName the application name * @param out Writer to write the ARFF output to */ public void convert(String relationName, Document document, String appName, final Writer out) throws IOException, MissingNodeException { scan(document, appName); generateHeader(relationName, out); generateInstances(document, appName, out); } /** * Scan a Document to find out the ranges of attributes. * All Documents must be scanned before generating the ARFF * header and instances. * * @param document the Document * @param appName the application name */ public void scan(Document document, final String appName) throws MissingNodeException, IOException { List<Element> bugInstanceList = getBugInstanceList(document); for (final Element element : bugInstanceList) { scanAttributeList(new AttributeCallback() { public void apply(Attribute attribute) throws MissingNodeException { attribute.scan(element, appName); } }); } } /** * Generate ARFF header. * Documents must have already been scanned. * * @param relationName the relation name * @param out Writer to write the ARFF output to */ public void generateHeader(String relationName, final Writer out) throws MissingNodeException, IOException { out.write("@relation "); out.write(relationName); out.write("\n\n"); scanAttributeList(new AttributeCallback() { public void apply(Attribute attribute) throws IOException { out.write("@attribute "); out.write(attribute.getName()); out.write(" "); out.write(attribute.getRange()); out.write("\n"); } }); out.write("\n"); out.write("@data\n"); } /** * Generate instances from given Document. * Document should already have been scanned, and the ARFF header generated. * * @param document the Document * @param appName the application name * @param out Writer to write the ARFF output to */ public void generateInstances(Document document, final String appName, final Writer out) throws MissingNodeException, IOException { List<Element> bugInstanceList = getBugInstanceList(document); for (final Element element : bugInstanceList) { scanAttributeList(new AttributeCallback() { boolean first = true; public void apply(Attribute attribute) throws IOException { if (!first) out.write(","); first = false; String value; try { value = attribute.getInstanceValue(element, appName); } catch (MissingNodeException e) { value = "?"; } out.write(value); } }); out.write("\n"); } } /** * Apply a callback to all Attributes. * * @param callback the callback */ public void scanAttributeList(AttributeCallback callback) throws MissingNodeException, IOException { for (Attribute attribute : attributeList) { callback.apply(attribute); } } // ------------------------------------------------------------ // Implementation // ------------------------------------------------------------ private static int getBugClassification(String annotationText) { StringTokenizer tok = new StringTokenizer(annotationText, " \t\r\n\f.,:;-"); int state = UNCLASSIFIED; while (tok.hasMoreTokens()) { String s = tok.nextToken(); if (s.equals("BUG")) state |= BUG; else if (s.equals("NOT_BUG")) state |= NOT_BUG; else if (s.equals("HARMLESS")) state |= HARMLESS; } if ((state & NOT_BUG) != 0) return NOT_BUG; else if ((state & BUG) != 0) return ((state & HARMLESS) != 0) ? HARMLESS_BUG : BUG; else return UNCLASSIFIED; } private List<Element> getBugInstanceList(Document document) { List <Element>bugInstanceList = document.selectNodes(nodeSelectionXpath); if (dropUnclassifiedWarnings) { for (Iterator<Element> i = bugInstanceList.iterator(); i.hasNext(); ) { Element element = i.next(); String annotationText = element.valueOf("./UserAnnotation[text()]"); int classification = getBugClassification(annotationText); if (classification == UNCLASSIFIED) i.remove(); } } return bugInstanceList; } private static class C2ACommandLine extends CommandLine { private ConvertToARFF converter = new ConvertToARFF(); public C2ACommandLine() { addOption("-select","xpath expression","select BugInstance elements"); addSwitch("-train", "drop unclassified warnings"); addSwitch("-id", "add unique id attribute (as nominal)"); addSwitch("-ids", "add unique id attribute (as string)"); addSwitch("-idr", "add random unique id attribtue (as nominal)"); addSwitch("-app", "add application name attribute"); addOption("-nominal", "attrName,xpath", "add a nominal attribute"); addOption("-boolean", "attrName,xpath", "add a boolean attribute"); addOption("-numeric", "attrName,xpath", "add a numeric attribute"); addSwitch("-classification", "add bug classification attribute"); addSwitch("-binclass", "add binary (bug/not_bug) classification attribute"); addSwitch("-priority", "add priority attribute"); addOption("-appname", "app name", "set application name of all tuples"); } public ConvertToARFF getConverter() { return converter; } @Override protected void handleOption(String option, String optionExtraPart) throws IOException { if (option.equals("-train")) { converter.dropUnclassifiedWarnings(); } else if (option.equals("-id")) { converter.addIdAttribute(); } else if (option.equals("-ids")) { converter.addAttribute(new IdStringAttribute()); } else if (option.equals("-idr")) { converter.addAttribute(new RandomIdAttribute()); } else if (option.equals("-app")) { converter.addAppNameAttribute(); } else if (option.equals("-classification")) { converter.addClassificationAttribute(); } else if (option.equals("-binclass")) { converter.addAttribute(new BinaryClassificationAttribute()); } else if (option.equals("-priority")) { converter.addPriorityAttribute(); } } private interface XPathAttributeCreator { public Attribute create(String name, String xpath); } @Override protected void handleOptionWithArgument(String option, String argument) throws IOException { if (option.equals("-select")) { converter.setNodeSelectionXpath(argument); } else if (option.equals("-nominal")) { addXPathAttribute(option, argument, new XPathAttributeCreator() { public Attribute create(String name,String xpath) { return new NominalAttribute(name, xpath); } }); } else if (option.equals("-boolean")) { addXPathAttribute(option, argument, new XPathAttributeCreator() { public Attribute create(String name,String xpath) { return new BooleanAttribute(name, xpath); } }); } else if (option.equals("-numeric")) { addXPathAttribute(option, argument, new XPathAttributeCreator(){ public Attribute create(String name,String xpath) { return new NumericAttribute(name, xpath); } }); } else if (option.equals("-appname")) { converter.setAppName(argument); } } protected void addXPathAttribute(String option, String argument, XPathAttributeCreator creator) { int comma = argument.indexOf(','); if (comma < 0) { throw new IllegalArgumentException("Missing comma separating attribute name and xpath in " + option + " option: " + argument); } String attrName = argument.substring(0, comma); String xpath = argument.substring(comma + 1); converter.addAttribute(creator.create(attrName, xpath)); } public void printUsage(PrintStream out) { out.println("Usage: " + ConvertToARFF.class.getName() + " [options] <relation name> <output file> <findbugs results> [<findbugs results>...]"); super.printUsage(out); } } public String toAppName(String fileName) { if (appName != null) return appName; // Remove file extension, if any int lastDot = fileName.lastIndexOf('.'); if (lastDot >= 0) fileName = fileName.substring(0, lastDot); return fileName; } public static void main(String[] argv) throws Exception { // Expand any option files argv = CommandLine.expandOptionFiles(argv, true, true); // Parse command line arguments C2ACommandLine commandLine = new C2ACommandLine(); int argCount = commandLine.parse(argv); if (argCount > argv.length - 3) { commandLine.printUsage(System.err); System.exit(1); } String relationName = argv[argCount++]; String outputFileName = argv[argCount++]; // Create the converter ConvertToARFF converter = commandLine.getConverter(); if (converter.getNumAttributes() == 0) { throw new IllegalArgumentException("No attributes specified!"); } // Open output file Writer out = new OutputStreamWriter(new BufferedOutputStream( new FileOutputStream(outputFileName))); // Read documents, // scan documents to find ranges of attributes List<DataFile> dataFileList = new ArrayList<DataFile>(); while (argCount < argv.length) { String fileName = argv[argCount++]; // Read input file as dom4j tree SAXReader reader = new SAXReader(); Document document = reader.read(fileName); DataFile dataFile = new DataFile(document, converter.toAppName(fileName)); dataFileList.add(dataFile); converter.scan(dataFile.getDocument(), dataFile.getAppName()); } // Generate ARFF header converter.generateHeader(relationName, out); // Generate instances from each document for (DataFile dataFile : dataFileList) { converter.generateInstances(dataFile.getDocument(), dataFile.getAppName(), out); } out.close(); }}// vim:ts=4
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -