📄 crawlsettingssaxsource.java
字号:
/* CrawlSettingsSAXSource * * $Id: CrawlSettingsSAXSource.java,v 1.8 2005/03/31 23:49:51 stack-sf Exp $ * * Created on Dec 5, 2003 * * Copyright (C) 2004 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */package org.archive.crawler.settings;import java.io.IOException;import java.text.ParseException;import java.util.Iterator;import javax.management.AttributeNotFoundException;import javax.management.MBeanInfo;import javax.xml.transform.sax.SAXSource;import org.archive.crawler.settings.refinements.PortnumberCriteria;import org.archive.crawler.settings.refinements.Refinement;import org.archive.crawler.settings.refinements.RegularExpressionCriteria;import org.archive.crawler.settings.refinements.TimespanCriteria;import org.archive.util.ArchiveUtils;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.DTDHandler;import org.xml.sax.EntityResolver;import org.xml.sax.ErrorHandler;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.AttributesImpl;/** Class that takes a CrawlerSettings object and create SAXEvents from it. * * This is a helper class for XMLSettingsHandler. * * @author John Erik Halse */public class CrawlSettingsSAXSource extends SAXSource implements XMLReader { // for prettyprinting XML file private static final int indentAmount = 2; private CrawlerSettings settings; private ContentHandler handler; private boolean orderFile = false; /** Constructs a new CrawlSettingsSAXSource. * * @param settings the settings object to create SAX events from. */ public CrawlSettingsSAXSource(CrawlerSettings settings) { super(); this.settings = settings; if (settings.getParent() == null) { orderFile = true; } } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getFeature(java.lang.String) */ public boolean getFeature(String name) throws SAXNotRecognizedException, SAXNotSupportedException { return false; } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean) */ public void setFeature(String name, boolean value) throws SAXNotRecognizedException, SAXNotSupportedException { } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getProperty(java.lang.String) */ public Object getProperty(String name) throws SAXNotRecognizedException, SAXNotSupportedException { return null; } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object) */ public void setProperty(String name, Object value) throws SAXNotRecognizedException, SAXNotSupportedException { } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver) */ public void setEntityResolver(EntityResolver resolver) { } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getEntityResolver() */ public EntityResolver getEntityResolver() { return null; } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler) */ public void setDTDHandler(DTDHandler handler) { } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getDTDHandler() */ public DTDHandler getDTDHandler() { return null; } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler) */ public void setContentHandler(ContentHandler handler) { this.handler = handler; } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getContentHandler() */ public ContentHandler getContentHandler() { return handler; } /* (non-Javadoc) * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler) */ public void setErrorHandler(ErrorHandler handler) { } /* (non-Javadoc) * @see org.xml.sax.XMLReader#getErrorHandler() */ public ErrorHandler getErrorHandler() { return null; } // We're not doing namespaces private static final String nsu = ""; // NamespaceURI private static final char[] indentArray = "\n ".toCharArray(); /* (non-Javadoc) * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource) */ public void parse(InputSource input) throws IOException, SAXException { if (handler == null) { throw new SAXException("No content handler"); } handler.startDocument(); AttributesImpl atts = new AttributesImpl(); atts.addAttribute( "http://www.w3.org/2001/XMLSchema-instance", "xsi", "xmlns:xsi", nsu, "http://www.w3.org/2001/XMLSchema-instance"); atts.addAttribute( "http://www.w3.org/2001/XMLSchema-instance", "noNamespaceSchemaLocation", "xsi:noNamespaceSchemaLocation", nsu, XMLSettingsHandler.XML_SCHEMA); String rootElement; if (settings.isRefinement()) { rootElement = XMLSettingsHandler.XML_ROOT_REFINEMENT; } else if (orderFile) { rootElement = XMLSettingsHandler.XML_ROOT_ORDER; } else { rootElement = XMLSettingsHandler.XML_ROOT_HOST_SETTINGS; } handler.startElement(nsu, rootElement, rootElement, atts); parseMetaData(1 + indentAmount); if (settings.hasRefinements()) { parseRefinements(1 + indentAmount); } // Write the modules Iterator modules = settings.topLevelModules(); while (modules.hasNext()) { ComplexType complexType = (ComplexType) modules.next(); parseComplexType(complexType, 1 + indentAmount); } handler.ignorableWhitespace(indentArray, 0, 1); handler.endElement(nsu, rootElement, rootElement); handler.ignorableWhitespace(indentArray, 0, 1); handler.endDocument(); } private void parseRefinements(int indent) throws SAXException { Attributes nullAtts = new AttributesImpl(); handler.ignorableWhitespace(indentArray, 0, indent); handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, nullAtts); Iterator it = settings.refinementsIterator(); while (it.hasNext()) { Refinement refinement = (Refinement) it.next(); handler.ignorableWhitespace(indentArray, 0, indent + indentAmount); AttributesImpl reference = new AttributesImpl(); reference.addAttribute(nsu, XMLSettingsHandler.XML_ELEMENT_REFERENCE, XMLSettingsHandler.XML_ELEMENT_REFERENCE, nsu, refinement .getReference()); handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENT, XMLSettingsHandler.XML_ELEMENT_REFINEMENT, reference); writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION, refinement.getDescription(), nullAtts, indent + 2 * indentAmount); parseRefinementLimits(refinement, indent + 2 * indentAmount); handler.ignorableWhitespace(indentArray, 0, indent + indentAmount); handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENT, XMLSettingsHandler.XML_ELEMENT_REFINEMENT); } handler.ignorableWhitespace(indentArray, 0, indent); handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST); } private void parseRefinementLimits(Refinement refinement, int indent) throws SAXException { Attributes nullAtts = new AttributesImpl(); handler.ignorableWhitespace(indentArray, 0, indent); handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS, XMLSettingsHandler.XML_ELEMENT_LIMITS, nullAtts); Iterator it = refinement.criteriaIterator();
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -