📄 xmlreader.java

📁 本程序用于对页面信息进行提取并分析
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
上一页 12
    /**     * Return the current entity resolver.     *     * @return The current entity resolver, or null if none     *         has been registered.     * @see #setEntityResolver     */    public EntityResolver getEntityResolver ()    {        return (mEntityResolver);    }    /**     * Allow an application to register a DTD event handler.     *     * <p>If the application does not register a DTD handler, all DTD     * events reported by the SAX parser will be silently ignored.</p>     *     * <p>Applications may register a new or different handler in the     * middle of a parse, and the SAX parser must begin using the new     * handler immediately.</p>     *     * @param handler The DTD handler.     * @see #getDTDHandler     */    public void setDTDHandler (DTDHandler handler)    {        mDTDHandler = handler;    }    /**     * Return the current DTD handler.     *     * @return The current DTD handler, or null if none     *         has been registered.     * @see #setDTDHandler     */    public DTDHandler getDTDHandler ()    {        return (mDTDHandler);    }    /**     * Allow an application to register a content event handler.     *     * <p>If the application does not register a content handler, all     * content events reported by the SAX parser will be silently     * ignored.</p>     *     * <p>Applications may register a new or different handler in the     * middle of a parse, and the SAX parser must begin using the new     * handler immediately.</p>     *     * @param handler The content handler.     * @see #getContentHandler     */    public void setContentHandler (ContentHandler handler)    {        mContentHandler = handler;    }    /**     * Return the current content handler.     *     * @return The current content handler, or null if none     *         has been registered.     * @see #setContentHandler     */    public ContentHandler getContentHandler ()    {        return (mContentHandler);    }    /**     * Allow an application to register an error event handler.     *     * <p>If the application does not register an error handler, all     * error events reported by the SAX parser will be silently     * ignored; however, normal processing may not continue.  It is     * highly recommended that all SAX applications implement an     * error handler to avoid unexpected bugs.</p>     *     * <p>Applications may register a new or different handler in the     * middle of a parse, and the SAX parser must begin using the new     * handler immediately.</p>     *     * @param handler The error handler.     * @see #getErrorHandler     */    public void setErrorHandler (ErrorHandler handler)    {        mErrorHandler = handler;    }    /**     * Return the current error handler.     *     * @return The current error handler, or null if none     *         has been registered.     * @see #setErrorHandler     */    public ErrorHandler getErrorHandler ()    {        return (mErrorHandler);    }    ////////////////////////////////////////////////////////////////////    // Parsing.    ////////////////////////////////////////////////////////////////////    /**     * Parse an XML document.     *     * <p>The application can use this method to instruct the XML     * reader to begin parsing an XML document from any valid input     * source (a character stream, a byte stream, or a URI).</p>     *     * <p>Applications may not invoke this method while a parse is in     * progress (they should create a new XMLReader instead for each     * nested XML document).  Once a parse is complete, an     * application may reuse the same XMLReader object, possibly with a     * different input source.     * Configuration of the XMLReader object (such as handler bindings and     * values established for feature flags and properties) is unchanged     * by completion of a parse, unless the definition of that aspect of     * the configuration explicitly specifies other behavior.     * (For example, feature flags or properties exposing     * characteristics of the document being parsed.)     * </p>     *     * <p>During the parse, the XMLReader will provide information     * about the XML document through the registered event     * handlers.</p>     *     * <p>This method is synchronous: it will not return until parsing     * has ended.  If a client application wants to terminate      * parsing early, it should throw an exception.</p>     *     * @param input The input source for the top-level of the     *        XML document.     * @exception org.xml.sax.SAXException Any SAX exception, possibly     *            wrapping another exception.     * @exception java.io.IOException An IO exception from the parser,     *            possibly from a byte stream or character stream     *            supplied by the application.     * @see org.xml.sax.InputSource     * @see #parse(java.lang.String)     * @see #setEntityResolver     * @see #setDTDHandler     * @see #setContentHandler     * @see #setErrorHandler      */    public void parse (InputSource input)	throws IOException, SAXException    {        Locator locator;        ParserFeedback feedback;        if (null != mContentHandler)            try            {                mParser = new Parser (                    new Lexer (                        new Page (                            input.getByteStream (),                            input.getEncoding ())));                locator = new Locator (mParser);                if (null != mErrorHandler)                    feedback = new Feedback (mErrorHandler, locator);                else                    feedback = new DefaultParserFeedback (0);                mParser.setFeedback (feedback);                mContentHandler.setDocumentLocator (locator);                try                {                    mContentHandler.startDocument ();                    for (NodeIterator iterator = mParser.elements ();                                        iterator.hasMoreNodes ();                        doSAX (iterator.nextNode ()));                    mContentHandler.endDocument ();                }                catch (SAXException se)                {                    if (null != mErrorHandler)                        mErrorHandler.fatalError (new SAXParseException (                            "contentHandler threw me", locator, se));                }            }            catch (ParserException pe)            {                if (null != mErrorHandler)                    mErrorHandler.fatalError (new SAXParseException (                        pe.getMessage (), "", "", 0, 0));            }    }    /**     * Parse an XML document from a system identifier (URI).     *     * <p>This method is a shortcut for the common case of reading a     * document from a system identifier.  It is the exact     * equivalent of the following:</p>     *     * <pre>     * parse(new InputSource(systemId));     * </pre>     *     * <p>If the system identifier is a URL, it must be fully resolved     * by the application before it is passed to the parser.</p>     *     * @param systemId The system identifier (URI).     * @exception org.xml.sax.SAXException Any SAX exception, possibly     *            wrapping another exception.     * @exception java.io.IOException An IO exception from the parser,     *            possibly from a byte stream or character stream     *            supplied by the application.     * @see #parse(org.xml.sax.InputSource)     */    public void parse (String systemId)	throws IOException, SAXException    {        Locator locator;        ParserFeedback feedback;        if (null != mContentHandler)            try            {                mParser = new Parser (systemId);                locator = new Locator (mParser);                if (null != mErrorHandler)                    feedback = new Feedback (mErrorHandler, locator);                else                    feedback = new DefaultParserFeedback (DefaultParserFeedback.QUIET);                mParser.setFeedback (feedback);                // OK, try a simplistic parse                mContentHandler.setDocumentLocator (locator);                try                {                    mContentHandler.startDocument ();                    for (NodeIterator iterator = mParser.elements (); iterator.hasMoreNodes (); )                        doSAX (iterator.nextNode ());                    mContentHandler.endDocument ();                }                catch (SAXException se)                {                    if (null != mErrorHandler)                        mErrorHandler.fatalError (                            new SAXParseException ("contentHandler threw me", locator, se));                }            }            catch (ParserException pe)            {                if (null != mErrorHandler)                    mErrorHandler.fatalError (                        new SAXParseException (pe.getMessage (), "", systemId, 0, 0));            }    }    /**     * Process nodes recursively on the DocumentHandler.     * Calls methods on the handler based on the type and whether it's an end tag.     * Processes composite tags recursively.     * Does rudimentary namespace processing according to the state of {@link #mNameSpaces}     * and {@link #mNameSpacePrefixes}.      * @param node The htmlparser node to traverse.     * @exception ParserException If a parse error occurs.     * @exception SAXException If a SAX error occurs.     */    protected void doSAX (Node node)        throws            ParserException,            SAXException    {        Tag tag;        Tag end;        if (node instanceof Remark)        {            String text = mParser.getLexer ().getPage ().getText (node.getStartPosition (), node.getEndPosition ());            mContentHandler.ignorableWhitespace (text.toCharArray (), 0, text.length ());        }        else if (node instanceof Text)        {            String text = mParser.getLexer ().getPage ().getText (node.getStartPosition (), node.getEndPosition ());            mContentHandler.characters (text.toCharArray (), 0, text.length ());        }        else if (node instanceof Tag)        {            tag = (Tag)node;            if (mNameSpaces)                mSupport.processName (tag.getTagName (), mParts, false);            else            {                mParts[0] = "";                mParts[1] = "";            }            if (mNameSpacePrefixes)                mParts[2] = tag.getTagName ();            else if (mNameSpaces)                mParts[2] = "";            else                mParts[2] = tag.getTagName ();            mContentHandler.startElement (                mParts[0], // uri                mParts[1], // local                mParts[2], // raw                new Attributes (tag, mSupport, mParts));            NodeList children = tag.getChildren ();            if (null != children)                for (int i = 0; i < children.size (); i++)                    doSAX (children.elementAt (i));            end = tag.getEndTag ();            if (null != end)            {                if (mNameSpaces)                    mSupport.processName (end.getTagName (), mParts, false);                else                {                    mParts[0] = "";                    mParts[1] = "";                }                if (mNameSpacePrefixes)                    mParts[2] = end.getTagName ();                else if (mNameSpaces)                    mParts[2] = "";                else                    mParts[2] = end.getTagName ();                mContentHandler.endElement (                    mParts[0], // uri                    mParts[1], // local                    mParts[2]); // raw            }        }    }}
上一页 12
💿 文件大小 6824 K
👤 上传用户 gankai1983
📂 所属分类 Jsp/Servlet
🏷️ 相关标签

#程序 #分 #页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -