📄 crawlsettingssaxhandler.java
字号:
/* * CrawlSettingsSAXHandler * * $Id: CrawlSettingsSAXHandler.java 5111 2007-05-03 01:43:43Z gojomo $ * * Created on Dec 8, 2003 * * Copyright (C) 2004 Internet Archive. * * This file is part of the Heritrix web crawler (crawler.archive.org). * * Heritrix is free software; you can redistribute it and/or modify it under the * terms of the GNU Lesser Public License as published by the Free Software * Foundation; either version 2.1 of the License, or any later version. * * Heritrix is distributed in the hope that it will be useful, but WITHOUT ANY * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR * A PARTICULAR PURPOSE. See the GNU Lesser Public License for more details. * * You should have received a copy of the GNU Lesser Public License along with * Heritrix; if not, write to the Free Software Foundation, Inc., 59 Temple * Place, Suite 330, Boston, MA 02111-1307 USA */package org.archive.crawler.settings;import java.lang.reflect.InvocationTargetException;import java.text.ParseException;import java.util.HashMap;import java.util.Map;import java.util.Stack;import java.util.logging.Level;import java.util.logging.Logger;import javax.management.Attribute;import javax.management.AttributeNotFoundException;import javax.management.InvalidAttributeValueException;import org.archive.crawler.settings.Constraint.FailedCheck;import org.archive.crawler.settings.refinements.PortnumberCriteria;import org.archive.crawler.settings.refinements.Refinement;import org.archive.crawler.settings.refinements.RegularExpressionCriteria;import org.archive.crawler.settings.refinements.TimespanCriteria;import org.archive.util.ArchiveUtils;import org.xml.sax.Attributes;import org.xml.sax.Locator;import org.xml.sax.SAXException;import org.xml.sax.SAXParseException;import org.xml.sax.helpers.DefaultHandler;/** * An SAX element handler that updates a CrawlerSettings object. * * This is a helper class for the XMLSettingsHandler. * * @author John Erik Halse */public class CrawlSettingsSAXHandler extends DefaultHandler implements ValueErrorHandler { private static Logger logger = Logger .getLogger("org.archive.crawler.settings.XMLSettingsHandler"); private Locator locator; private CrawlerSettings settings; private SettingsHandler settingsHandler; private Map<String,ElementHandler> handlers = new HashMap<String,ElementHandler>(); private Stack<ElementHandler> handlerStack = new Stack<ElementHandler>(); private Stack<Object> stack = new Stack<Object>(); /** Keeps track of elements which subelements should be skipped. */ private Stack<Boolean> skip = new Stack<Boolean>(); private StringBuffer buffer = new StringBuffer(); private String value; /** * Creates a new CrawlSettingsSAXHandler. * * @param settings the settings object that should be updated from this * handler. */ public CrawlSettingsSAXHandler(CrawlerSettings settings) { super(); this.settings = settings; this.settingsHandler = settings.getSettingsHandler(); handlers.put(XMLSettingsHandler.XML_ROOT_ORDER, new RootHandler()); handlers.put(XMLSettingsHandler.XML_ROOT_HOST_SETTINGS, new RootHandler()); handlers.put(XMLSettingsHandler.XML_ROOT_REFINEMENT, new RootHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_CONTROLLER, new ModuleHandler()); handlers .put(XMLSettingsHandler.XML_ELEMENT_OBJECT, new ModuleHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_NEW_OBJECT, new NewModuleHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_META, new MetaHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_NAME, new NameHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_DESCRIPTION, new DescriptionHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_OPERATOR, new OperatorHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_ORGANIZATION, new OrganizationHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_AUDIENCE, new AudienceHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_DATE, new DateHandler()); handlers.put(SettingsHandler.MAP, new MapHandler()); handlers.put(SettingsHandler.INTEGER_LIST, new ListHandler()); handlers.put(SettingsHandler.STRING_LIST, new ListHandler()); handlers.put(SettingsHandler.DOUBLE_LIST, new ListHandler()); handlers.put(SettingsHandler.FLOAT_LIST, new ListHandler()); handlers.put(SettingsHandler.LONG_LIST, new ListHandler()); handlers.put(SettingsHandler.STRING, new SimpleElementHandler()); handlers.put(SettingsHandler.TEXT, new SimpleElementHandler()); handlers.put(SettingsHandler.INTEGER, new SimpleElementHandler()); handlers.put(SettingsHandler.FLOAT, new SimpleElementHandler()); handlers.put(SettingsHandler.LONG, new SimpleElementHandler()); handlers.put(SettingsHandler.BOOLEAN, new SimpleElementHandler()); handlers.put(SettingsHandler.DOUBLE, new SimpleElementHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_REFINEMENTLIST, new RefinementListHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_REFINEMENT, new RefinementHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_REFERENCE, new ReferenceHandler()); handlers .put(XMLSettingsHandler.XML_ELEMENT_LIMITS, new LimitsHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_TIMESPAN, new TimespanHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_PORTNUMBER, new PortnumberHandler()); handlers.put(XMLSettingsHandler.XML_ELEMENT_URIMATCHES, new URIMatcherHandler()); } /* * (non-Javadoc) * * @see org.xml.sax.ContentHandler#setDocumentLocator(org.xml.sax.Locator) */ public void setDocumentLocator(Locator locator) { super.setDocumentLocator(locator); this.locator = locator; } /* * (non-Javadoc) * * @see org.xml.sax.ContentHandler#startDocument() */ public void startDocument() throws SAXException { settingsHandler.registerValueErrorHandler(this); skip.push(new Boolean(false)); super.startDocument(); } /* * (non-Javadoc) * * @see org.xml.sax.ContentHandler#endDocument() */ public void endDocument() throws SAXException { settingsHandler.unregisterValueErrorHandler(this); super.endDocument(); } /* * (non-Javadoc) * * @see org.xml.sax.ContentHandler#characters(char[], int, int) */ public void characters(char[] ch, int start, int length) throws SAXException { super.characters(ch, start, length); buffer.append(ch, start, length); } /** * Start of an element. Decide what handler to use, and call it. * * @param uri * @param localName * @param qName * @param attributes * @throws SAXException */ public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException { ElementHandler handler = ((ElementHandler) handlers.get(qName)); if (handler != null) { handlerStack.push(handler); if (((Boolean) skip.peek()).booleanValue()) { skip.push(new Boolean(true)); String moduleName = attributes .getValue(XMLSettingsHandler.XML_ATTRIBUTE_NAME); logger.fine("Skipping: " + qName + " " + moduleName); } else { try { handler.startElement(qName, attributes); skip.push(new Boolean(false)); } catch (SAXException e) { if (e.getException() instanceof InvocationTargetException || e.getException() instanceof AttributeNotFoundException) { skip.push(new Boolean(true)); } else { skip.push(new Boolean(false)); throw e; } } } } else { String tmp = "Unknown element '" + qName + "' in '" + locator.getSystemId() + "', line: " + locator.getLineNumber() + ", column: " + locator.getColumnNumber(); if (this.settingsHandler.getOrder() != null && this.settingsHandler.getOrder().getController() != null) { logger.log(Level.WARNING, tmp); } logger.warning(tmp); } } /** * End of an element. * * @param uri * @param localName * @param qName * @throws SAXException */ public void endElement(String uri, String localName, String qName) throws SAXException { value = buffer.toString().trim(); buffer.setLength(0); ElementHandler handler = (ElementHandler) handlerStack.pop(); if (!((Boolean) skip.pop()).booleanValue()) { if (handler != null) { handler.endElement(qName); } } } public void illegalElementError(String name) throws SAXParseException { throw new SAXParseException("Element '" + name + "' not allowed here", locator); } /** * Superclass of all the elementhandlers. * * This class should be subclassed for the different XML-elements. * * @author John Erik Halse */ private class ElementHandler { /** * Start of an element * * @param name * @param atts * @throws SAXException */ public void startElement(String name, Attributes atts) throws SAXException { } /** * End of an element * * @param name * @throws SAXException */ public void endElement(String name) throws SAXException { } } /** * Handle the root element. * * This class checks that the root element is of the right type. * * @author John Erik Halse */ private class RootHandler extends ElementHandler { public void startElement(String name, Attributes atts) throws SAXException { // Check filetype if ((name.equals(XMLSettingsHandler.XML_ROOT_ORDER) && settings .getScope() != null) || (name.equals(XMLSettingsHandler.XML_ROOT_HOST_SETTINGS) && settings .getScope() == null) || (name.equals(XMLSettingsHandler.XML_ROOT_REFINEMENT) && !settings .isRefinement())) { throw new SAXParseException("Wrong document type '" + name + "'", locator); } } } // Meta handlers private class MetaHandler extends ElementHandler { } private class NameHandler extends ElementHandler { public void endElement(String name) throws SAXException { if (handlerStack.peek() instanceof MetaHandler) { settings.setName(value); } else { illegalElementError(name); } } } private class DescriptionHandler extends ElementHandler { public void endElement(String name) throws SAXException { if (handlerStack.peek() instanceof MetaHandler) { settings.setDescription(value);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -