📄 xmlserialization.java
字号:
/*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
/*
* XMLSerialization.java
* Copyright (C) 2004 University of Waikato, Hamilton, New Zealand
*
*/
package weka.core.xml;
import weka.core.Utils;
import java.beans.BeanInfo;
import java.beans.Introspector;
import java.beans.PropertyDescriptor;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.io.Reader;
import java.io.Writer;
import java.lang.reflect.Array;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.util.Enumeration;
import java.util.Hashtable;
import java.util.Vector;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.Text;
import weka.core.Version;
/**
* With this class objects can be serialized to XML instead into a binary
* format. It uses introspection (cf. beans) to retrieve the data from the
* given object, i.e. it can only access beans-conform fields automatically.
* <p>
* The generic approach of writing data as XML can be overriden by adding
* custom methods for reading/writing in a derived class
* (cf. <code>m_Properties</code>, <code>m_CustomMethods</code>).<br>
* Custom read and write methods must have the same signature (and also be
* <code>public</code>!) as the <code>readFromXML</code> and <code>writeToXML</code>
* methods. Methods that apply to the naming rule <code>read + property name</code>
* are added automatically to the list of methods by the method
* <code>XMLSerializationMethodHandler.addMethods()</code>.
* <p>
* Other properties that are not conform the bean set/get-methods have to be
* processed manually in a derived class (cf. <code>readPostProcess(Object)</code>,
* <code>writePostProcess(Object)</code>).
* <p>
* For a complete XML serialization/deserialization have a look at the
* <code>KOML</code> class.
* <p>
* If a stored class has a constructor that takes a String to initialize
* (e.g. String or Double) then the content of the tag will used for the
* constructor, e.g. from
* <pre><object name="name" class="String" primitive="no">Smith</object></pre>
* "Smith" will be used to instantiate a String object as constructor argument.
* <p>
*
* @see KOML
* @see #fromXML(Document)
* @see #toXML(Object)
* @see #m_Properties
* @see #m_CustomMethods
* @see #readPostProcess(Object)
* @see #writePostProcess(Object)
* @see #readFromXML(Element)
* @see #writeToXML(Element, Object, String)
* @see #addMethods()
*
*
* @author FracPete (fracpete at waikato dot ac dot nz)
* @version $Revision: 1.1 $
*/
public class XMLSerialization {
/** for debugging purposes only */
protected static boolean DEBUG = false;
/** the node that is currently processed, in case of writing the parent node
* (something might go wrong writing the new child) and in case of reading
* the actual node that is tried to process */
protected Element m_CurrentNode = null;
/** the tag for an object */
public final static String TAG_OBJECT = "object";
/** the version attribute */
public final static String ATT_VERSION = "version";
/** the tag for the name */
public final static String ATT_NAME = "name";
/** the tag for the class */
public final static String ATT_CLASS = "class";
/** the tag whether primitive or not (yes/no) */
public final static String ATT_PRIMITIVE = "primitive";
/** the tag whether array or not (yes/no) */
public final static String ATT_ARRAY = "array";
/** the tag whether null or not (yes/no) */
public final static String ATT_NULL = "null";
/** the value "yes" for the primitive and array attribute */
public final static String VAL_YES = "yes";
/** the value "no" for the primitive and array attribute */
public final static String VAL_NO = "no";
/** the value of the name for the root node */
public final static String VAL_ROOT = "__root__";
/** the root node of the XML document */
public final static String ROOT_NODE = TAG_OBJECT;
/** default value for attribute ATT_PRIMITIVE
* @see #ATT_PRIMITIVE */
public final static String ATT_PRIMITIVE_DEFAULT = VAL_NO;
/** default value for attribute ATT_ARRAY
* @see #ATT_ARRAY */
public final static String ATT_ARRAY_DEFAULT = VAL_NO;
/** default value for attribute ATT_NULL
* @see #ATT_NULL */
public final static String ATT_NULL_DEFAULT = VAL_NO;
/** the DOCTYPE for the serialization */
public final static String DOCTYPE =
"<!DOCTYPE " + ROOT_NODE + "\n"
+ "[\n"
+ " <!ELEMENT " + TAG_OBJECT + " (#PCDATA | " + TAG_OBJECT + ")*>\n"
+ " <!ATTLIST " + TAG_OBJECT + " " + ATT_NAME + " CDATA #REQUIRED>\n"
+ " <!ATTLIST " + TAG_OBJECT + " " + ATT_CLASS + " CDATA #REQUIRED>\n"
+ " <!ATTLIST " + TAG_OBJECT + " " + ATT_PRIMITIVE + " CDATA \"" + ATT_PRIMITIVE_DEFAULT + "\">\n"
+ " <!ATTLIST " + TAG_OBJECT + " " + ATT_ARRAY + " CDATA \"" + ATT_ARRAY_DEFAULT + "\"> <!-- the dimensions of the array; no=0, yes=1 -->\n"
+ " <!ATTLIST " + TAG_OBJECT + " " + ATT_NULL + " CDATA \"" + ATT_NULL_DEFAULT + "\">\n"
+ " <!ATTLIST " + TAG_OBJECT + " " + ATT_VERSION + " CDATA \"" + Version.VERSION + "\">\n"
+ "]\n"
+ ">";
/** the XMLDocument that performs the transformation to and fro XML */
protected XMLDocument m_Document = null;
/** for handling properties (ignored/allowed) */
protected PropertyHandler m_Properties = null;
/** for handling custom read/write methods */
protected XMLSerializationMethodHandler m_CustomMethods = null;
/** for overriding class names (Class <-> Classname (String))
* @see #overrideClassname(Class) */
protected Hashtable m_ClassnameOverride = null;
/**
* initializes the serialization
*
* @throws Exception if initialization fails
*/
public XMLSerialization() throws Exception {
super();
clear();
}
/**
* used for debugging purposes, i.e. only if DEBUG is set to true.
* needs a newly generated Throwable instance to get the method/line from
* @param t a throwable instance, generated in the calling method
* @param msg a message to pring
* @see #DEBUG
*/
protected void trace(Throwable t, String msg) {
if ( (DEBUG) && (t.getStackTrace().length > 0) ) {
System.out.println("trace: " + t.getStackTrace()[0] + ": " + msg);
}
}
/**
* generates internally a new XML document and clears also the IgnoreList and
* the mappings for the Read/Write-Methods
*/
public void clear() throws Exception {
m_Document = new XMLDocument();
m_Document.setValidating(true);
m_Document.newDocument(DOCTYPE, ROOT_NODE);
m_Properties = new PropertyHandler();
m_CustomMethods = new XMLSerializationMethodHandler(this);
m_ClassnameOverride = new Hashtable();
// java.io.File is sometimes represented as another class:
// - Win32: sun.awt.shell.Win32ShellFolder2
// - Linux: sun.awt.shell.DefaultShellFolder
// -> we set it to "java.io.File"
m_ClassnameOverride.put(java.io.File.class, java.io.File.class.getName());
setVersion(Version.VERSION);
m_CurrentNode = null;
}
/**
* sets the given version string in the XML document
*/
private void setVersion(String version) {
Document doc;
doc = m_Document.getDocument();
doc.getDocumentElement().setAttribute(ATT_VERSION, version);
}
/**
* returns the WEKA version with which the serialized object was created
* @see Version
*/
public String getVersion() {
Document doc;
String result;
doc = m_Document.getDocument();
result = doc.getDocumentElement().getAttribute(ATT_VERSION);
return result;
}
/**
* Checks the version in the current Document with the one of the current
* release. If the version differ, a warning is printed.
*/
private void checkVersion() {
String versionStr;
Version version;
version = new Version();
versionStr = getVersion();
if (versionStr.equals(""))
System.out.println("WARNING: has no version!");
else if (version.isOlder(versionStr))
System.out.println("WARNING: loading a newer version (" + versionStr + " > " + Version.VERSION + ")!");
else if (version.isNewer(versionStr))
System.out.println("NOTE: loading an older version (" + versionStr + " < " + Version.VERSION + ")!");
}
/**
* returns a hashtable with PropertyDescriptors that have "get" and "set"
* methods indexed by the property name.
*
* @see java.beans.PropertyDescriptor
* @param o the object to retrieve the descriptors from
* @return the PropertyDescriptors indexed by name of the property
* @throws Exception if the introspection fails
*/
protected Hashtable getDescriptors(Object o) throws Exception {
BeanInfo info;
PropertyDescriptor[] desc;
int i;
Hashtable result;
result = new Hashtable();
info = Introspector.getBeanInfo(o.getClass());
desc = info.getPropertyDescriptors();
for (i = 0; i < desc.length; i++) {
// get AND set method?
if ( (desc[i].getReadMethod() != null) && (desc[i].getWriteMethod() != null) ) {
// in ignore list, i.e. a general ignore without complete path?
if (m_Properties.isIgnored(desc[i].getDisplayName()))
continue;
// in ignore list of the class?
if (m_Properties.isIgnored(o, desc[i].getDisplayName()))
continue;
// not an allowed property
if (!m_Properties.isAllowed(o, desc[i].getDisplayName()))
continue;
result.put(desc[i].getDisplayName(), desc[i]);
}
}
return result;
}
/**
* returns the path of the "name" attribute from the root down to this node
* (including it).
*
* @param node the node to get the path for
* @return the complete "name" path of this node
*/
protected String getPath(Element node) {
String result;
result = node.getAttribute(ATT_NAME);
while (node.getParentNode() != node.getOwnerDocument()) {
node = (Element) node.getParentNode();
result = node.getAttribute(ATT_NAME) + "." + result;
}
return result;
}
/**
* returns either <code>VAL_YES</code> or <code>VAL_NO</code> depending
* on the value of <code>b</code>
*
* @param b the boolean to turn into a string
* @return the value in string representation
*/
protected String booleanToString(boolean b) {
if (b)
return VAL_YES;
else
return VAL_NO;
}
/**
* turns the given string into a boolean, if a positive number is given,
* then zero is considered FALSE, every other number TRUE; the empty string
* is also considered being FALSE
*
* @param s the string to turn into a boolean
* @return the string as boolean
*/
protected boolean stringToBoolean(String s) {
if (s.equals(""))
return false;
else if (s.equals(VAL_YES))
return true;
else if (s.equalsIgnoreCase("true"))
return true;
else if (s.replaceAll("[0-9]*", "").equals(""))
return (Integer.parseInt(s) != 0);
else
return false;
}
/**
* appends a new node to the parent with the given parameters (a non-array)
*
* @param parent the parent of this node. if it is <code>null</code> the
* document root element is used
* @param name the name of the node
* @param classname the classname for this node
* @param primitive whether it is a primitve data type or not (i.e. an object)
* @return the generated node
*/
protected Element addElement(Element parent, String name, String classname, boolean primitive) {
return addElement(parent, name, classname, primitive, 0);
}
/**
* appends a new node to the parent with the given parameters
*
* @param parent the parent of this node. if it is <code>null</code> the
* document root element is used
* @param name the name of the node
* @param classname the classname for this node
* @param primitive whether it is a primitve data type or not (i.e. an object)
* @param array the dimensions of the array (0 if not an array)
* @return the generated node
*/
protected Element addElement(Element parent, String name, String classname, boolean primitive, int array) {
return addElement(parent, name, classname, primitive, array, false);
}
/**
* appends a new node to the parent with the given parameters
*
* @param parent the parent of this node. if it is <code>null</code> the
* document root element is used
* @param name the name of the node
* @param classname the classname for this node
* @param primitive whether it is a primitve data type or not (i.e. an object)
* @param array the dimensions of the array (0 if not an array)
* @param isnull whether it is null
* @return the generated node
*/
protected Element addElement(Element parent, String name, String classname, boolean primitive, int array, boolean isnull) {
Element result;
if (parent == null)
result = m_Document.getDocument().getDocumentElement();
else
result = (Element) parent.appendChild(m_Document.getDocument().createElement(TAG_OBJECT));
// attributes
// mandatory attributes:
result.setAttribute(ATT_NAME, name);
result.setAttribute(ATT_CLASS, classname);
// add following attributes only if necessary, i.e., different from default:
if (!booleanToString(primitive).equals(ATT_PRIMITIVE_DEFAULT))
result.setAttribute(ATT_PRIMITIVE, booleanToString(primitive));
// multi-dimensional array?
if (array > 1) {
result.setAttribute(ATT_ARRAY, Integer.toString(array));
}
// backwards compatible: 0 -> no array ("no"), 1 -> 1-dim. array ("yes")
else {
if (!booleanToString(array == 1).equals(ATT_ARRAY_DEFAULT))
result.setAttribute(ATT_ARRAY, booleanToString(array == 1));
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -