⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.java

📁 BBS-CS 天乙社区 v6.0.1(含源码) 天乙社区6.0是一套基于JAVA技术的网络虚拟社区
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
/* Generated By:JavaCC: Do not edit this line. HTMLParser.java */
package com.laoer.bbscs.lucene.html;

import java.io.*;
import java.util.Properties;

public class HTMLParser
    implements HTMLParserConstants {
  public static int SUMMARY_LENGTH = 200;

  StringBuffer title = new StringBuffer(SUMMARY_LENGTH);
  StringBuffer summary = new StringBuffer(SUMMARY_LENGTH * 2);
  Properties metaTags = new Properties();
  String currentMetaTag = null;
  String currentMetaContent = null;
  int length = 0;
  boolean titleComplete = false;
  boolean inTitle = false;
  boolean inMetaTag = false;
  boolean inStyle = false;
  boolean afterTag = false;
  boolean afterSpace = false;
  String eol = System.getProperty("line.separator");
  Reader pipeIn = null;
  Writer pipeOut;
  private MyPipedInputStream pipeInStream = null;
  private PipedOutputStream pipeOutStream = null;

  private class MyPipedInputStream
      extends PipedInputStream {

    public MyPipedInputStream() {
      super();
    }

    public MyPipedInputStream(PipedOutputStream src) throws IOException {
      super(src);
    }

    public boolean full() throws IOException {
      return this.available() >= PipedInputStream.PIPE_SIZE;
    }
  }

  public HTMLParser(File file) throws FileNotFoundException {
    this(new FileInputStream(file));
  }

  public String getTitle() throws IOException, InterruptedException {
    if (pipeIn == null) {
      getReader(); // spawn parsing thread
    }
    while (true) {
      synchronized (this) {
        if (titleComplete || pipeInStream.full()) {
          break;
        }
        wait(10);
      }
    }
    return title.toString().trim();
  }

  public Properties getMetaTags() throws IOException,
      InterruptedException {
    if (pipeIn == null) {
      getReader(); // spawn parsing thread
    }
    while (true) {
      synchronized (this) {
        if (titleComplete || pipeInStream.full()) {
          break;
        }
        wait(10);
      }
    }
    return metaTags;
  }

  public String getSummary() throws IOException, InterruptedException {
    if (pipeIn == null) {
      getReader(); // spawn parsing thread
    }
    while (true) {
      synchronized (this) {
        if (summary.length() >= SUMMARY_LENGTH || pipeInStream.full()) {
          break;
        }
        wait(10);
      }
    }
    if (summary.length() > SUMMARY_LENGTH) {
      summary.setLength(SUMMARY_LENGTH);

    }
    String sum = summary.toString().trim();
    String tit = getTitle();
    if (sum.startsWith(tit) || sum.equals("")) {
      return tit;
    }
    else {
      return sum;
    }
  }

  public Reader getReader() throws IOException {
    if (pipeIn == null) {
      pipeInStream = new MyPipedInputStream();
      pipeOutStream = new PipedOutputStream(pipeInStream);
      pipeIn = new InputStreamReader(pipeInStream);
      pipeOut = new OutputStreamWriter(pipeOutStream);

      Thread thread = new ParserThread(this);
      thread.start(); // start parsing
    }

    return pipeIn;
  }

  void addToSummary(String text) {
    if (summary.length() < SUMMARY_LENGTH) {
      summary.append(text);
      if (summary.length() >= SUMMARY_LENGTH) {
        synchronized (this) {
          notifyAll();
        }
      }
    }
  }

  void addText(String text) throws IOException {
    if (inStyle) {
      return;
    }
    if (inTitle) {
      title.append(text);
    }
    else {
      addToSummary(text);
      if (!titleComplete && !title.equals("")) { // finished title
        synchronized (this) {
          titleComplete = true; // tell waiting threads
          notifyAll();
        }
      }
    }

    length += text.length();
    pipeOut.write(text);

    afterSpace = false;
  }

  void addMetaTag() throws IOException {
    metaTags.setProperty(currentMetaTag, currentMetaContent);
    currentMetaTag = null;
    currentMetaContent = null;
    return;
  }

  void addSpace() throws IOException {
    if (!afterSpace) {
      if (inTitle) {
        title.append(" ");
      }
      else {
        addToSummary(" ");

      }
      String space = afterTag ? eol : " ";
      length += space.length();
      pipeOut.write(space);
      afterSpace = true;
    }
  }

  final public void HTMLDocument() throws ParseException, IOException {
    Token t;
    label_1:while (true) {
      switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
        case ScriptStart:
        case TagName:
        case DeclName:
        case Comment1:
        case Comment2:
        case Word:
        case Entity:
        case Space:
        case Punct:
          ;
          break;
        default:
          jj_la1[0] = jj_gen;
          break label_1;
      }
      switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
        case TagName:
          Tag();
          afterTag = true;
          break;
        case DeclName:
          t = Decl();
          afterTag = true;
          break;
        case Comment1:
        case Comment2:
          CommentTag();
          afterTag = true;
          break;
        case ScriptStart:
          ScriptTag();
          afterTag = true;
          break;
        case Word:
          t = jj_consume_token(Word);
          addText(t.image);
          afterTag = false;
          break;
        case Entity:
          t = jj_consume_token(Entity);
          addText(Entities.decode(t.image));
          afterTag = false;
          break;
        case Punct:
          t = jj_consume_token(Punct);
          addText(t.image);
          afterTag = false;
          break;
        case Space:
          jj_consume_token(Space);
          addSpace();
          afterTag = false;
          break;
        default:
          jj_la1[1] = jj_gen;
          jj_consume_token( -1);
          throw new ParseException();
      }
    }
    jj_consume_token(0);
  }

  final public void Tag() throws ParseException, IOException {
    Token t1, t2;
    boolean inImg = false;
    t1 = jj_consume_token(TagName);
    String tagName = t1.image.toLowerCase();
    if (Tags.WS_ELEMS.contains(tagName)) {
      addSpace();
    }
    inTitle = tagName.equalsIgnoreCase("<title"); // keep track if in <TITLE>
    inMetaTag = tagName.equalsIgnoreCase("<META"); // keep track if in <META>
    inStyle = tagName.equalsIgnoreCase("<STYLE"); // keep track if in <STYLE>
    inImg = tagName.equalsIgnoreCase("<img"); // keep track if in <IMG>

    label_2:while (true) {
      switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
        case ArgName:
          ;
          break;
        default:
          jj_la1[2] = jj_gen;
          break label_2;
      }
      t1 = jj_consume_token(ArgName);
      switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
        case ArgEquals:
          jj_consume_token(ArgEquals);
          switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
            case ArgValue:
            case ArgQuote1:
            case ArgQuote2:
              t2 = ArgValue();
              if (inImg && t1.image.equalsIgnoreCase("alt") && t2 != null) {
                addText("[" + t2.image + "]");

              }
              if (inMetaTag &&
                  (t1.image.equalsIgnoreCase("name") ||
                   t1.image.equalsIgnoreCase("HTTP-EQUIV")
                   )
                  && t2 != null) {
                currentMetaTag = t2.image.toLowerCase();
                if (currentMetaTag != null && currentMetaContent != null) {
                  addMetaTag();
                }
              }
              if (inMetaTag && t1.image.equalsIgnoreCase("content") && t2 !=
                  null) {
                currentMetaContent = t2.image.toLowerCase();
                if (currentMetaTag != null && currentMetaContent != null) {
                  addMetaTag();
                }
              }
              break;
            default:
              jj_la1[3] = jj_gen;
              ;
          }
          break;
        default:
          jj_la1[4] = jj_gen;
          ;
      }
    }
    jj_consume_token(TagEnd);
  }

  final public Token ArgValue() throws ParseException {
    Token t = null;
    switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
      case ArgValue:
        t = jj_consume_token(ArgValue);
        {
          if (true) {
            return t;
          }
        }
        break;
      default:
        jj_la1[5] = jj_gen;
        if (jj_2_1(2)) {
          jj_consume_token(ArgQuote1);
          jj_consume_token(CloseQuote1);
          {
            if (true) {
              return t;
            }
          }
        }
        else {
          switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
            case ArgQuote1:
              jj_consume_token(ArgQuote1);
              t = jj_consume_token(Quote1Text);
              jj_consume_token(CloseQuote1);
              {
                if (true) {
                  return t;
                }
              }
              break;
            default:
              jj_la1[6] = jj_gen;
              if (jj_2_2(2)) {
                jj_consume_token(ArgQuote2);
                jj_consume_token(CloseQuote2);
                {
                  if (true) {
                    return t;
                  }
                }
              }
              else {
                switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
                  case ArgQuote2:
                    jj_consume_token(ArgQuote2);
                    t = jj_consume_token(Quote2Text);
                    jj_consume_token(CloseQuote2);
                    {
                      if (true) {
                        return t;
                      }
                    }
                    break;
                  default:
                    jj_la1[7] = jj_gen;
                    jj_consume_token( -1);
                    throw new ParseException();
                }
              }
          }
        }
    }
    throw new Error("Missing return statement in function");
  }

  final public Token Decl() throws ParseException {
    Token t;
    t = jj_consume_token(DeclName);
    label_3:while (true) {
      switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
        case ArgName:
        case ArgEquals:
        case ArgValue:
        case ArgQuote1:
        case ArgQuote2:
          ;
          break;
        default:
          jj_la1[8] = jj_gen;
          break label_3;
      }
      switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
        case ArgName:
          jj_consume_token(ArgName);
          break;
        case ArgValue:
        case ArgQuote1:
        case ArgQuote2:
          ArgValue();
          break;
        case ArgEquals:
          jj_consume_token(ArgEquals);
          break;
        default:
          jj_la1[9] = jj_gen;
          jj_consume_token( -1);
          throw new ParseException();
      }
    }
    jj_consume_token(TagEnd);
    {
      if (true) {
        return t;
      }
    }
    throw new Error("Missing return statement in function");
  }

  final public void CommentTag() throws ParseException {
    switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
      case Comment1:
        jj_consume_token(Comment1);
        label_4:
            while (true) {
          switch ( (jj_ntk == -1) ? jj_ntk() : jj_ntk) {
            case CommentText1:
              ;
              break;
            default:
              jj_la1[10] = jj_gen;
              break label_4;
          }
          jj_consume_token(CommentText1);
        }
        jj_consume_token(CommentEnd1);
        break;
      case Comment2:
        jj_consume_token(Comment2);
        label_5:

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -