📄 xmlinstances.java
字号:
/* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *//* * XMLInstances.java * Copyright (C) 2006 University of Waikato, Hamilton, New Zealand */package weka.core.xml;import weka.core.Attribute;import weka.core.FastVector;import weka.core.Instance;import weka.core.Instances;import weka.core.ProtectedProperties;import weka.core.SparseInstance;import weka.core.Utils;import weka.core.Version;import java.io.BufferedReader;import java.io.FileInputStream;import java.io.InputStream;import java.io.InputStreamReader;import java.io.Reader;import java.io.Serializable;import java.util.Enumeration;import java.util.Properties;import java.util.Vector;import java.util.zip.GZIPInputStream;import org.w3c.dom.Element;/** * XML representation of the Instances class. * * @author fracpete (fracpete at waikato dot ac dot nz) * @version $Revision: 1.2 $ */public class XMLInstances extends XMLDocument implements Serializable { /** for serialization */ private static final long serialVersionUID = 3626821327547416099L; /** The filename extension that should be used for xrff files */ public static String FILE_EXTENSION = ".xrff"; // tags /** the root element */ public final static String TAG_DATASET = "dataset"; /** the header element */ public final static String TAG_HEADER = "header"; /** the body element */ public final static String TAG_BODY = "body"; /** the notes element */ public final static String TAG_NOTES = "notes"; /** the attributes element */ public final static String TAG_ATTRIBUTES = "attributes"; /** the attribute element */ public final static String TAG_ATTRIBUTE = "attribute"; /** the labels element */ public final static String TAG_LABELS = "labels"; /** the label element */ public final static String TAG_LABEL = "label"; /** the meta-data element */ public final static String TAG_METADATA = "metadata"; /** the property element */ public final static String TAG_PROPERTY = "property"; /** the data element */ public final static String TAG_INSTANCES = "instances"; /** the instance element */ public final static String TAG_INSTANCE = "instance"; /** the value element */ public final static String TAG_VALUE = "value"; // attributes /** the version attribute */ public final static String ATT_VERSION = "version"; /** the type attribute */ public final static String ATT_TYPE = "type"; /** the format attribute (for date attributes) */ public final static String ATT_FORMAT = "format"; /** the class attribute */ public final static String ATT_CLASS = "class"; /** the index attribute */ public final static String ATT_INDEX = "index"; /** the weight attribute */ public final static String ATT_WEIGHT = "weight"; /** the missing attribute */ public final static String ATT_MISSING = "missing"; // values /** the value for numeric */ public final static String VAL_NUMERIC = "numeric"; /** the value for date */ public final static String VAL_DATE = "date"; /** the value for nominal */ public final static String VAL_NOMINAL = "nominal"; /** the value for string */ public final static String VAL_STRING = "string"; /** the value for relational */ public final static String VAL_RELATIONAL = "relational"; /** the value for normal */ public final static String VAL_NORMAL = "normal"; /** the value for sparse */ public final static String VAL_SPARSE = "sparse"; /** the DTD */ public final static String DOCTYPE = "<!" + DTD_DOCTYPE + " " + TAG_DATASET + "\n" + "[\n" + " <!" + DTD_ELEMENT + " " + TAG_DATASET + " (" + TAG_HEADER + "," + TAG_BODY + ")" + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_DATASET + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_DATASET + " " + ATT_VERSION + " " + DTD_CDATA + " \"" + Version.VERSION + "\">\n" + "\n" + " <!" + DTD_ELEMENT + " " + TAG_HEADER + " (" + TAG_NOTES + DTD_OPTIONAL + "," + TAG_ATTRIBUTES + ")" + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_BODY + " (" + TAG_INSTANCES + ")" + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_NOTES + " " + DTD_ANY + "> <!-- comments, information, copyright, etc. -->\n" + "\n" + " <!" + DTD_ELEMENT + " " + TAG_ATTRIBUTES + " (" + TAG_ATTRIBUTE + DTD_AT_LEAST_ONE + ")" + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_ATTRIBUTE + " (" + TAG_LABELS + DTD_OPTIONAL + "," + TAG_METADATA + DTD_OPTIONAL + "," + TAG_ATTRIBUTES + DTD_OPTIONAL + ")" + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_TYPE + " (" + VAL_NUMERIC + DTD_SEPARATOR + VAL_DATE + DTD_SEPARATOR + VAL_NOMINAL + DTD_SEPARATOR + VAL_STRING + DTD_SEPARATOR + VAL_RELATIONAL + ") " + DTD_REQUIRED + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_FORMAT + " " + DTD_CDATA + " " + DTD_IMPLIED + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_ATTRIBUTE + " " + ATT_CLASS + " (" + VAL_YES + DTD_SEPARATOR + VAL_NO + ") \"" + VAL_NO + "\"" + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_LABELS + " (" + TAG_LABEL + DTD_ZERO_OR_MORE + ")" + "> <!-- only for type \"nominal\" -->\n" + " <!" + DTD_ELEMENT + " " + TAG_LABEL + " " + DTD_ANY + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_METADATA + " (" + TAG_PROPERTY + DTD_ZERO_OR_MORE + ")" + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_PROPERTY + " " + DTD_ANY + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_PROPERTY + " " + ATT_NAME + " " + DTD_CDATA + " " + DTD_REQUIRED + ">\n" + "\n" + " <!" + DTD_ELEMENT + " " + TAG_INSTANCES + " (" + TAG_INSTANCE + DTD_ZERO_OR_MORE + ")" + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_INSTANCE + " (" + TAG_VALUE + DTD_ZERO_OR_MORE + ")" + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_INSTANCE + " " + ATT_TYPE + " (" + VAL_NORMAL + DTD_SEPARATOR + VAL_SPARSE + ") \"" + VAL_NORMAL + "\"" + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_INSTANCE + " " + ATT_WEIGHT + " " + DTD_CDATA + " " + DTD_IMPLIED + ">\n" + " <!" + DTD_ELEMENT + " " + TAG_VALUE + " (" + DTD_PCDATA + DTD_SEPARATOR + TAG_INSTANCES + ")" + DTD_ZERO_OR_MORE + ">\n" + " <!" + DTD_ATTLIST + " " + TAG_VALUE + " " + ATT_INDEX + " " + DTD_CDATA + " " + DTD_IMPLIED + "> <!-- 1-based index (only used for instance format \"sparse\") -->\n" + " <!" + DTD_ATTLIST + " " + TAG_VALUE + " " + ATT_MISSING + " (" + VAL_YES + DTD_SEPARATOR + VAL_NO + ") \"" + VAL_NO + "\"" + ">\n" + "]\n" + ">"; /** the precision for numbers */ protected int m_Precision = 6; /** the underlying Instances */ protected Instances m_Instances; /** * the default constructor * * @throws Exception if XML initialization fails */ public XMLInstances() throws Exception { super(); m_Instances = null; setDocType(DOCTYPE); setRootNode(TAG_DATASET); setValidating(true); } /** * generates the XML structure based on the given data * * @param data the data to build the XML structure from * @throws Exception if initialization/generation fails */ public XMLInstances(Instances data) throws Exception { this(); setInstances(data); } /** * generates the Instances directly from the reader containing the * XML data. * * @param reader the reader for the XML data * @throws Exception if something goes wrong */ public XMLInstances(Reader reader) throws Exception { this(); setXML(reader); } /** * adds the attribute to the XML structure * * @param parent the parent node to add the attribute node as child * @param att the attribute to add */ protected void addAttribute(Element parent, Attribute att) { Element node; Element child; Element property; Element label; String tmpStr; Enumeration enm; int i; node = m_Document.createElement(TAG_ATTRIBUTE); parent.appendChild(node); // XML attributes // name node.setAttribute(ATT_NAME, att.name()); // type switch (att.type()) { case Attribute.NUMERIC: node.setAttribute(ATT_TYPE, VAL_NUMERIC); break; case Attribute.DATE: node.setAttribute(ATT_TYPE, VAL_DATE); break; case Attribute.NOMINAL: node.setAttribute(ATT_TYPE, VAL_NOMINAL); break; case Attribute.STRING: node.setAttribute(ATT_TYPE, VAL_STRING); break; case Attribute.RELATIONAL: node.setAttribute(ATT_TYPE, VAL_RELATIONAL); break; default: node.setAttribute(ATT_TYPE, "???"); } // labels if (att.isNominal()) { child = m_Document.createElement(TAG_LABELS); node.appendChild(child); enm = att.enumerateValues(); while (enm.hasMoreElements()) { tmpStr = enm.nextElement().toString(); label = m_Document.createElement(TAG_LABEL); child.appendChild(label); label.appendChild(m_Document.createTextNode(validContent(tmpStr))); } } // format if (att.isDate()) node.setAttribute(ATT_FORMAT, validContent(att.getDateFormat())); // class if (m_Instances.classIndex() > -1) { if (att == m_Instances.classAttribute()) node.setAttribute(ATT_CLASS, VAL_YES); } // add meta-data if ( (att.getMetadata() != null) && (att.getMetadata().size() > 0) ) { child = m_Document.createElement(TAG_METADATA); node.appendChild(child); enm = att.getMetadata().propertyNames(); while (enm.hasMoreElements()) { tmpStr = enm.nextElement().toString(); property = m_Document.createElement(TAG_PROPERTY); child.appendChild(property); property.setAttribute(ATT_NAME, tmpStr); property.appendChild(m_Document.createTextNode(validContent(att.getMetadata().getProperty(tmpStr, "")))); } } // relational attribute? if (att.isRelationValued()) { child = m_Document.createElement(TAG_ATTRIBUTES); node.appendChild(child); for (i = 0; i < att.relation().numAttributes(); i++) addAttribute(child, att.relation().attribute(i)); } } /** * turns all <, > and &into character entities and returns that * string. Necessary for TextNodes. * * @param content string to convert * @return the valid content string */ protected String validContent(String content) { String result; result = content; // these five entities are recognized by every XML processor // see http://www.xml.com/pub/a/2001/03/14/trxml10.html result = result.replaceAll("&", "&") .replaceAll("\"", """) .replaceAll("'", "'") .replaceAll("<", "<") .replaceAll(">", ">"); // in addition, replace some other entities as well result = result.replaceAll("\n", " ") .replaceAll("\r", " ") .replaceAll("\t", "	"); return result; } /** * adds the instance to the XML structure * * @param parent the parent node to add the instance node as child * @param inst the instance to add */ protected void addInstance(Element parent, Instance inst) { Element node; Element value; Element child; boolean sparse; int i; int n; int index; node = m_Document.createElement(TAG_INSTANCE); parent.appendChild(node); // sparse? sparse = (inst instanceof SparseInstance); if (sparse) node.setAttribute(ATT_TYPE, VAL_SPARSE); // weight if (inst.weight() != 1.0) node.setAttribute(ATT_WEIGHT, Utils.doubleToString(inst.weight(), m_Precision)); // values for (i = 0; i < inst.numValues(); i++) { index = inst.index(i); value = m_Document.createElement(TAG_VALUE); node.appendChild(value); if (inst.isMissing(index)) { value.setAttribute(ATT_MISSING, VAL_YES); } else { if (inst.attribute(index).isRelationValued()) { child = m_Document.createElement(TAG_INSTANCES); value.appendChild(child); for (n = 0; n < inst.relationalValue(i).numInstances(); n++) addInstance(child, inst.relationalValue(i).instance(n)); } else { if (inst.attribute(index).type() == Attribute.NUMERIC) value.appendChild(m_Document.createTextNode(Utils.doubleToString(inst.value(index), m_Precision))); else value.appendChild(m_Document.createTextNode(validContent(inst.stringValue(index)))); } } if (sparse) value.setAttribute(ATT_INDEX, "" + (index+1)); } } /** * generates the XML structure for the header */ protected void headerToXML() { Element root; Element node; Element child; int i; root = m_Document.getDocumentElement(); root.setAttribute(ATT_NAME, m_Instances.relationName()); root.setAttribute(ATT_VERSION, Version.VERSION); // create "header" node node = m_Document.createElement(TAG_HEADER); root.appendChild(node); // add all attributes child = m_Document.createElement(TAG_ATTRIBUTES); node.appendChild(child); for (i = 0; i < m_Instances.numAttributes(); i++) addAttribute(child, m_Instances.attribute(i)); } /** * generates the XML structure from the rows */ protected void dataToXML() { Element root; Element node; Element child; int i; root = m_Document.getDocumentElement(); // create "body" node node = m_Document.createElement(TAG_BODY); root.appendChild(node);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -