⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 crawlsettingssaxsource.java

📁 最强的爬虫工程
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* CrawlSettingsSAXSource * * $Id: CrawlSettingsSAXSource.java,v 1.8 2005/03/31 23:49:51 stack-sf Exp $ * * Created on Dec 5, 2003 * * Copyright (C) 2004 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify * it under the terms of the GNU Lesser Public License as published by * the Free Software Foundation; either version 2.1 of the License, or * any later version. * * Heritrix is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License * along with Heritrix; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA */package org.archive.crawler.settings;import java.io.IOException;import java.text.ParseException;import java.util.Iterator;import javax.management.AttributeNotFoundException;import javax.management.MBeanInfo;import javax.xml.transform.sax.SAXSource;import org.archive.crawler.settings.refinements.PortnumberCriteria;import org.archive.crawler.settings.refinements.Refinement;import org.archive.crawler.settings.refinements.RegularExpressionCriteria;import org.archive.crawler.settings.refinements.TimespanCriteria;import org.archive.util.ArchiveUtils;import org.xml.sax.Attributes;import org.xml.sax.ContentHandler;import org.xml.sax.DTDHandler;import org.xml.sax.EntityResolver;import org.xml.sax.ErrorHandler;import org.xml.sax.InputSource;import org.xml.sax.SAXException;import org.xml.sax.SAXNotRecognizedException;import org.xml.sax.SAXNotSupportedException;import org.xml.sax.XMLReader;import org.xml.sax.helpers.AttributesImpl;/** Class that takes a CrawlerSettings object and create SAXEvents from it. * * This is a helper class for XMLSettingsHandler. * * @author John Erik Halse */public class CrawlSettingsSAXSource extends SAXSource implements XMLReader {    // for prettyprinting XML file    private static final int indentAmount = 2;    private CrawlerSettings settings;    private ContentHandler handler;    private boolean orderFile = false;    /** Constructs a new CrawlSettingsSAXSource.     *     * @param settings the settings object to create SAX events from.     */    public CrawlSettingsSAXSource(CrawlerSettings settings) {        super();        this.settings = settings;        if (settings.getParent() == null) {            orderFile = true;        }    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#getFeature(java.lang.String)     */    public boolean getFeature(String name)        throws SAXNotRecognizedException, SAXNotSupportedException {        return false;    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#setFeature(java.lang.String, boolean)     */    public void setFeature(String name, boolean value)        throws SAXNotRecognizedException, SAXNotSupportedException {    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#getProperty(java.lang.String)     */    public Object getProperty(String name)        throws SAXNotRecognizedException, SAXNotSupportedException {        return null;    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#setProperty(java.lang.String, java.lang.Object)     */    public void setProperty(String name, Object value)        throws SAXNotRecognizedException, SAXNotSupportedException {    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#setEntityResolver(org.xml.sax.EntityResolver)     */    public void setEntityResolver(EntityResolver resolver) {    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#getEntityResolver()     */    public EntityResolver getEntityResolver() {        return null;    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#setDTDHandler(org.xml.sax.DTDHandler)     */    public void setDTDHandler(DTDHandler handler) {    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#getDTDHandler()     */    public DTDHandler getDTDHandler() {        return null;    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#setContentHandler(org.xml.sax.ContentHandler)     */    public void setContentHandler(ContentHandler handler) {        this.handler = handler;    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#getContentHandler()     */    public ContentHandler getContentHandler() {        return handler;    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#setErrorHandler(org.xml.sax.ErrorHandler)     */    public void setErrorHandler(ErrorHandler handler) {    }    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#getErrorHandler()     */    public ErrorHandler getErrorHandler() {        return null;    }    // We're not doing namespaces    private static final String nsu = ""; // NamespaceURI    private static final char[] indentArray =        "\n                                          ".toCharArray();    /* (non-Javadoc)     * @see org.xml.sax.XMLReader#parse(org.xml.sax.InputSource)     */    public void parse(InputSource input) throws IOException, SAXException {        if (handler == null) {            throw new SAXException("No content handler");        }        handler.startDocument();        AttributesImpl atts = new AttributesImpl();        atts.addAttribute(            "http://www.w3.org/2001/XMLSchema-instance",            "xsi",            "xmlns:xsi",            nsu,            "http://www.w3.org/2001/XMLSchema-instance");        atts.addAttribute(            "http://www.w3.org/2001/XMLSchema-instance",            "noNamespaceSchemaLocation",            "xsi:noNamespaceSchemaLocation",            nsu,            XMLSettingsHandler.XML_SCHEMA);        String rootElement;        if (settings.isRefinement()) {            rootElement = XMLSettingsHandler.XML_ROOT_REFINEMENT;        } else if (orderFile) {            rootElement = XMLSettingsHandler.XML_ROOT_ORDER;        } else {            rootElement = XMLSettingsHandler.XML_ROOT_HOST_SETTINGS;        }        handler.startElement(nsu, rootElement, rootElement, atts);        parseMetaData(1 + indentAmount);        if (settings.hasRefinements()) {            parseRefinements(1 + indentAmount);        }        // Write the modules        Iterator modules = settings.topLevelModules();        while (modules.hasNext()) {            ComplexType complexType = (ComplexType) modules.next();            parseComplexType(complexType, 1 + indentAmount);        }        handler.ignorableWhitespace(indentArray, 0, 1);        handler.endElement(nsu, rootElement, rootElement);        handler.ignorableWhitespace(indentArray, 0, 1);        handler.endDocument();    }    private void parseRefinements(int indent) throws SAXException {        Attributes nullAtts = new AttributesImpl();        handler.ignorableWhitespace(indentArray, 0, indent);        handler.startElement(nsu,                XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,                XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, nullAtts);        Iterator it = settings.refinementsIterator();        while (it.hasNext()) {            Refinement refinement = (Refinement) it.next();            handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);            AttributesImpl reference = new AttributesImpl();            reference.addAttribute(nsu,                    XMLSettingsHandler.XML_ELEMENT_REFERENCE,                    XMLSettingsHandler.XML_ELEMENT_REFERENCE, nsu, refinement                            .getReference());            handler.startElement(nsu,                    XMLSettingsHandler.XML_ELEMENT_REFINEMENT,                    XMLSettingsHandler.XML_ELEMENT_REFINEMENT, reference);            writeSimpleElement(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION,                    refinement.getDescription(), nullAtts, indent + 2                            * indentAmount);            parseRefinementLimits(refinement, indent + 2 * indentAmount);            handler.ignorableWhitespace(indentArray, 0, indent + indentAmount);            handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENT,                    XMLSettingsHandler.XML_ELEMENT_REFINEMENT);        }        handler.ignorableWhitespace(indentArray, 0, indent);        handler.endElement(nsu, XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST,                XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST);    }    private void parseRefinementLimits(Refinement refinement, int indent)            throws SAXException {        Attributes nullAtts = new AttributesImpl();        handler.ignorableWhitespace(indentArray, 0, indent);        handler.startElement(nsu, XMLSettingsHandler.XML_ELEMENT_LIMITS,                XMLSettingsHandler.XML_ELEMENT_LIMITS, nullAtts);        Iterator it = refinement.criteriaIterator();

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -