htmlcollector.java

来自「HTML解释器JAVA源码」· Java 代码 · 共 156 行

JAVA

156 行

/* * HtmlCollector.java -- structures an HTML document tree.   * Copyright (C) 1999 Quiotix Corporation.   * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License, version 2, as  * published by the Free Software Foundation.   * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the * GNU General Public License (http://www.gnu.org/copyleft/gpl.txt) * for more details. */package com.quiotix.html.parser;import java.util.*;import java.io.*;/** An HtmlVisitor which modifies the structure of the document so that * begin tags are matched properly with end tags and placed in TagBlock * elements.  Typically, an HtmlDocument is created by the parser, which  * simply returns a flat list of elements.  The HtmlCollector takes this * flat list and gives it the structure that is implied by the HTML content. * * @author Brian Goetz, Quiotix */public class HtmlCollector extends HtmlVisitor {  protected MyVector tagStack = new MyVector();  protected MyVector elements;  protected boolean collected;  protected static Hashtable dontMatch = new Hashtable();  protected static String[] dontMatchStrings    = { "AREA", "BASE", "BASEFONT", "BR", "COL", "HR", "IMG", "INPUT",         "ISINDEX", "LINK", "META", "P", "PARAM" };  static {    Integer dummy = new Integer(0);    for (int i=0; i < dontMatchStrings.length; i++)       dontMatch.put(dontMatchStrings[i], dummy);  };  private static class TagStackEntry {    String tagName;    int index;  };  private static class MyVector extends Vector {    MyVector()      { super();  }    MyVector(int n) { super(n); }    public void popN(int n) { elementCount -= n; }  };  protected int pushNode(HtmlDocument.HtmlElement e) {    elements.addElement(e);    return elements.size()-1;  };  public void visit(HtmlDocument.Comment c)     { pushNode(c); };  public void visit(HtmlDocument.Text t)        { pushNode(t); };  public void visit(HtmlDocument.Newline n)     { pushNode(n); };  public void visit(HtmlDocument.Tag t)         {     TagStackEntry ts = new TagStackEntry();    int index;    // Push the tag onto the element stack, and push an entry on the tag    // stack if it's a tag we care about matching    index = pushNode(t);    if (!t.emptyTag         && !dontMatch.containsKey(t.tagName.toUpperCase())) {      ts.tagName = t.tagName;      ts.index = index;      tagStack.addElement(ts);    };  };  public void visit(HtmlDocument.EndTag t)      {     int i;    for (i=tagStack.size()-1; i >= 0; i--) {      TagStackEntry ts = (TagStackEntry) tagStack.elementAt(i);      if (t.tagName.equalsIgnoreCase(ts.tagName)) {        HtmlDocument.TagBlock block;        HtmlDocument.ElementSequence blockElements;        HtmlDocument.Tag tag;              // Create a new ElementSequence and copy the elements to it        blockElements =           new HtmlDocument.ElementSequence(elements.size() - ts.index - 1);        for (int j=ts.index+1; j<elements.size(); j++)           blockElements.addElement((HtmlDocument.HtmlElement)                                    elements.elementAt(j));        tag = (HtmlDocument.Tag) elements.elementAt(ts.index);        block = new HtmlDocument.TagBlock(tag.tagName,                                           tag.attributeList, blockElements);        // Pop the elements off the stack, push the new block        elements.popN(elements.size() - ts.index);        elements.addElement(block);        // Pop the matched tag and intervening unmatched tags         tagStack.popN(tagStack.size()-i);                collected = true;        break;      };    };    // If we didn't find a match, just push the end tag    if (i < 0)       pushNode(t);  };  public void visit(HtmlDocument.TagBlock bl) {    HtmlCollector c = new HtmlCollector();    c.start();    c.visit(bl.body);    c.finish();    pushNode(bl);  }  public void visit(HtmlDocument.ElementSequence s) {    elements = new MyVector(s.elements.size());    collected = false;    for (Enumeration e = s.elements.elements();          e.hasMoreElements(); )      ((HtmlDocument.HtmlElement)e.nextElement()).accept(this);    if (collected)       s.elements = elements;  }  public void start() {}  public void finish() {}  public static void main (String[] args) throws Exception {    InputStream r = new FileInputStream(args[0]);    HtmlDocument document;        try {       document = new com.quiotix.html.parser.HtmlParser(r).HtmlDocument();      document.accept(new HtmlScrubber());      document.accept(new HtmlCollector());      document.accept(new HtmlDumper(System.out));    }    finally {      r.close();    };  };}

htmlcollector.java - 源码说明

本页面展示了「HTML解释器JAVA源码」中的 htmlcollector.java 源码文件，采用 Java 编程语言编写，共 156 行代码。您可以在线阅读完整代码内容，也可以返回资源详情页下载完整源码包进行本地学习和开发。

虫虫下载站收录了大量与HTML相关的技术资源，包括源代码、技术文档、电路图等，是电子工程师和嵌入式开发者的专业学习平台。

⌨️ 快捷键说明

复制代码Ctrl + C

搜索代码Ctrl + F

全屏模式F11

增大字号Ctrl + =

减小字号Ctrl + -

显示快捷键?