⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 htmlparser.java

📁 反ajax原代码
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
// Decompiled by Jad v1.5.7g. Copyright 2000 Pavel Kouznetsov.
// Jad home page: http://www.geocities.com/SiliconValley/Bridge/8617/jad.html
// Decompiler options: packimports(3) fieldsfirst ansi 
// Source File Name:   HTMLParser.java

package com.laoer.bbscs.lucene.html;

import com.laoer.bbscs.sys.Constant;
import java.io.*;
import java.util.*;

// Referenced classes of package com.laoer.bbscs.lucene.html:
//            ParserThread, ParseException, SimpleCharStream, HTMLParserTokenManager, 
//            Token, HTMLParserConstants, Entities, Tags

public class HTMLParser
    implements HTMLParserConstants
{
    static final class JJCalls
    {

        int gen;
        Token first;
        int arg;
        JJCalls next;

        JJCalls()
        {
        }
    }

    private static final class LookaheadSuccess extends Error
    {

        private LookaheadSuccess()
        {
        }

        LookaheadSuccess(_cls0 x0)
        {
            this();
        }
    }

    private class MyPipedInputStream extends PipedInputStream
    {

        public boolean full()
            throws IOException
        {
            return available() >= 1024;
        }

        public MyPipedInputStream()
        {
        }

        public MyPipedInputStream(PipedOutputStream src)
            throws IOException
        {
            super(src);
        }
    }


    public static int SUMMARY_LENGTH = 200;
    StringBuffer title;
    StringBuffer summary;
    Properties metaTags;
    String currentMetaTag;
    String currentMetaContent;
    int length;
    boolean titleComplete;
    boolean inTitle;
    boolean inMetaTag;
    boolean inStyle;
    boolean afterTag;
    boolean afterSpace;
    String eol;
    Reader pipeIn;
    Writer pipeOut;
    private MyPipedInputStream pipeInStream;
    private PipedOutputStream pipeOutStream;
    public HTMLParserTokenManager token_source;
    SimpleCharStream jj_input_stream;
    public Token token;
    public Token jj_nt;
    private int jj_ntk;
    private Token jj_scanpos;
    private Token jj_lastpos;
    private int jj_la;
    public boolean lookingAhead;
    private boolean jj_semLA;
    private int jj_gen;
    private final int jj_la1[];
    private static int jj_la1_0[];
    private final JJCalls jj_2_rtns[];
    private boolean jj_rescan;
    private int jj_gc;
    private final LookaheadSuccess jj_ls;
    private Vector jj_expentries;
    private int jj_expentry[];
    private int jj_kind;
    private int jj_lasttokens[];
    private int jj_endpos;

    public HTMLParser(File file)
        throws FileNotFoundException
    {
        this(((InputStream) (new FileInputStream(file))));
    }

    public String getTitle()
        throws IOException, InterruptedException
    {
        if(pipeIn == null)
            getReader();
_L2:
label0:
        {
            synchronized(this)
            {
                if(!titleComplete && !pipeInStream.full())
                    break label0;
            }
            break; /* Loop/switch isn't completed */
        }
        wait(10L);
        htmlparser;
        JVM INSTR monitorexit ;
        if(true) goto _L2; else goto _L1
_L1:
        return title.toString().trim();
    }

    public Properties getMetaTags()
        throws IOException, InterruptedException
    {
        if(pipeIn == null)
            getReader();
_L2:
label0:
        {
            synchronized(this)
            {
                if(!titleComplete && !pipeInStream.full())
                    break label0;
            }
            break; /* Loop/switch isn't completed */
        }
        wait(10L);
        htmlparser;
        JVM INSTR monitorexit ;
        if(true) goto _L2; else goto _L1
_L1:
        return metaTags;
    }

    public String getSummary()
        throws IOException, InterruptedException
    {
        if(pipeIn == null)
            getReader();
_L2:
label0:
        {
            synchronized(this)
            {
                if(summary.length() < SUMMARY_LENGTH && !pipeInStream.full())
                    break label0;
            }
            break; /* Loop/switch isn't completed */
        }
        wait(10L);
        htmlparser;
        JVM INSTR monitorexit ;
        if(true) goto _L2; else goto _L1
_L1:
        if(summary.length() > SUMMARY_LENGTH)
            summary.setLength(SUMMARY_LENGTH);
        String sum = summary.toString().trim();
        String tit = getTitle();
        if(sum.startsWith(tit) || sum.equals(""))
            return tit;
        else
            return sum;
    }

    public Reader getReader()
        throws IOException
    {
        if(pipeIn == null)
        {
            pipeInStream = new MyPipedInputStream();
            pipeOutStream = new PipedOutputStream(pipeInStream);
            pipeIn = new InputStreamReader(pipeInStream);
            pipeOut = new OutputStreamWriter(pipeOutStream);
            Thread thread = new ParserThread(this);
            thread.start();
        }
        return pipeIn;
    }

    void addToSummary(String text)
    {
        if(summary.length() < SUMMARY_LENGTH)
        {
            summary.append(text);
            if(summary.length() >= SUMMARY_LENGTH)
                synchronized(this)
                {
                    notifyAll();
                }
        }
    }

    void addText(String text)
        throws IOException
    {
        if(inStyle)
            return;
        if(inTitle)
        {
            title.append(text);
        } else
        {
            addToSummary(text);
            if(!titleComplete && !title.equals(""))
                synchronized(this)
                {
                    titleComplete = true;
                    notifyAll();
                }
        }
        length += text.length();
        pipeOut.write(text);
        afterSpace = false;
    }

    void addMetaTag()
        throws IOException
    {
        metaTags.setProperty(currentMetaTag, currentMetaContent);
        currentMetaTag = null;
        currentMetaContent = null;
    }

    void addSpace()
        throws IOException
    {
        if(!afterSpace)
        {
            if(inTitle)
                title.append(" ");
            else
                addToSummary(" ");
            String space = afterTag ? eol : " ";
            length += space.length();
            pipeOut.write(space);
            afterSpace = true;
        }
    }

    public final void HTMLDocument()
        throws ParseException, IOException
    {
label0:
        do
            switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
            {
            case 7: // '\007'
            case 8: // '\b'
            case 11: // '\013'
            default:
                jj_la1[0] = jj_gen;
                break label0;

            case 1: // '\001'
            case 2: // '\002'
            case 3: // '\003'
            case 4: // '\004'
            case 5: // '\005'
            case 6: // '\006'
            case 9: // '\t'
            case 10: // '\n'
            case 12: // '\f'
                switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
                {
                case 2: // '\002'
                    Tag();
                    afterTag = true;
                    break;

                case 3: // '\003'
                    Token t = Decl();
                    afterTag = true;
                    break;

                case 4: // '\004'
                case 5: // '\005'
                    CommentTag();
                    afterTag = true;
                    break;

                case 1: // '\001'
                    ScriptTag();
                    afterTag = true;
                    break;

                case 6: // '\006'
                    Token t = jj_consume_token(6);
                    addText(t.image);
                    afterTag = false;
                    break;

                case 9: // '\t'
                    Token t = jj_consume_token(9);
                    addText(Entities.decode(t.image));
                    afterTag = false;
                    break;

                case 12: // '\f'
                    Token t = jj_consume_token(12);
                    addText(t.image);
                    afterTag = false;
                    break;

                case 10: // '\n'
                    jj_consume_token(10);
                    addSpace();
                    afterTag = false;
                    break;

                case 7: // '\007'
                case 8: // '\b'
                case 11: // '\013'
                default:
                    jj_la1[1] = jj_gen;
                    jj_consume_token(-1);
                    throw new ParseException();
                }
                break;
            }
        while(true);
        jj_consume_token(0);
    }

    public final void Tag()
        throws ParseException, IOException
    {
        boolean inImg = false;
        Token t1 = jj_consume_token(2);
        String tagName = t1.image.toLowerCase();
        if(Tags.WS_ELEMS.contains(tagName))
            addSpace();
        inTitle = tagName.equalsIgnoreCase("<title");
        inMetaTag = tagName.equalsIgnoreCase("<META");
        inStyle = tagName.equalsIgnoreCase("<STYLE");
        inImg = tagName.equalsIgnoreCase("<img");
label0:
        do
            switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
            {
            default:
                jj_la1[2] = jj_gen;
                break label0;

            case 15: // '\017'
                t1 = jj_consume_token(15);
                switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
                {
                case 16: // '\020'
                    jj_consume_token(16);
                    switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
                    {
                    case 18: // '\022'
                    case 19: // '\023'
                    case 20: // '\024'
                        Token t2 = ArgValue();
                        if(inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
                            addText("[" + t2.image + "]");
                        if(inMetaTag && (t1.image.equalsIgnoreCase("name") || t1.image.equalsIgnoreCase("HTTP-EQUIV")) && t2 != null)
                        {
                            currentMetaTag = t2.image.toLowerCase();
                            if(currentMetaTag != null && currentMetaContent != null)
                                addMetaTag();
                        }
                        if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null)
                        {
                            currentMetaContent = t2.image.toLowerCase();
                            if(currentMetaTag != null && currentMetaContent != null)
                                addMetaTag();
                        }
                        break;

                    default:
                        jj_la1[3] = jj_gen;
                        break;
                    }
                    break;

                default:
                    jj_la1[4] = jj_gen;
                    break;
                }
                break;
            }
        while(true);
        jj_consume_token(17);
    }

    public final Token ArgValue()
        throws ParseException
    {
        Token t = null;
        switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
        {
        case 18: // '\022'
            t = jj_consume_token(18);
            return t;
        }
        jj_la1[5] = jj_gen;
        if(jj_2_1(2))
        {
            jj_consume_token(19);
            jj_consume_token(23);
            return t;
        }
        switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
        {
        case 19: // '\023'
            jj_consume_token(19);
            t = jj_consume_token(22);
            jj_consume_token(23);
            return t;
        }
        jj_la1[6] = jj_gen;
        if(jj_2_2(2))
        {
            jj_consume_token(20);
            jj_consume_token(25);
            return t;
        }
        switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
        {
        case 20: // '\024'
            jj_consume_token(20);
            t = jj_consume_token(24);
            jj_consume_token(25);
            return t;
        }
        jj_la1[7] = jj_gen;
        jj_consume_token(-1);
        throw new ParseException();
    }

    public final Token Decl()
        throws ParseException
    {
        Token t = jj_consume_token(3);
label0:
        do
            switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
            {
            case 17: // '\021'
            default:
                jj_la1[8] = jj_gen;
                break label0;

            case 15: // '\017'
            case 16: // '\020'
            case 18: // '\022'
            case 19: // '\023'
            case 20: // '\024'
                switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
                {
                case 15: // '\017'
                    jj_consume_token(15);
                    break;

                case 18: // '\022'
                case 19: // '\023'
                case 20: // '\024'
                    ArgValue();
                    break;

                case 16: // '\020'
                    jj_consume_token(16);
                    break;

                case 17: // '\021'
                default:
                    jj_la1[9] = jj_gen;
                    jj_consume_token(-1);
                    throw new ParseException();
                }
                break;
            }
        while(true);
        jj_consume_token(17);
        return t;
    }

    public final void CommentTag()
        throws ParseException
    {
        switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
        {
        case 4: // '\004'
            jj_consume_token(4);
label0:
            do
                switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
                {
                default:
                    jj_la1[10] = jj_gen;
                    break label0;

                case 26: // '\032'
                    jj_consume_token(26);
                    break;
                }
            while(true);
            jj_consume_token(27);
            break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -