📄 htmlparser.java
字号:
// Decompiled by Jad v1.5.7g. Copyright 2000 Pavel Kouznetsov.
// Jad home page: http://www.geocities.com/SiliconValley/Bridge/8617/jad.html
// Decompiler options: packimports(3) fieldsfirst ansi
// Source File Name: HTMLParser.java
package com.laoer.bbscs.lucene.html;
import com.laoer.bbscs.sys.Constant;
import java.io.*;
import java.util.*;
// Referenced classes of package com.laoer.bbscs.lucene.html:
// ParserThread, ParseException, SimpleCharStream, HTMLParserTokenManager,
// Token, HTMLParserConstants, Entities, Tags
public class HTMLParser
implements HTMLParserConstants
{
static final class JJCalls
{
int gen;
Token first;
int arg;
JJCalls next;
JJCalls()
{
}
}
private static final class LookaheadSuccess extends Error
{
private LookaheadSuccess()
{
}
LookaheadSuccess(_cls0 x0)
{
this();
}
}
private class MyPipedInputStream extends PipedInputStream
{
public boolean full()
throws IOException
{
return available() >= 1024;
}
public MyPipedInputStream()
{
}
public MyPipedInputStream(PipedOutputStream src)
throws IOException
{
super(src);
}
}
public static int SUMMARY_LENGTH = 200;
StringBuffer title;
StringBuffer summary;
Properties metaTags;
String currentMetaTag;
String currentMetaContent;
int length;
boolean titleComplete;
boolean inTitle;
boolean inMetaTag;
boolean inStyle;
boolean afterTag;
boolean afterSpace;
String eol;
Reader pipeIn;
Writer pipeOut;
private MyPipedInputStream pipeInStream;
private PipedOutputStream pipeOutStream;
public HTMLParserTokenManager token_source;
SimpleCharStream jj_input_stream;
public Token token;
public Token jj_nt;
private int jj_ntk;
private Token jj_scanpos;
private Token jj_lastpos;
private int jj_la;
public boolean lookingAhead;
private boolean jj_semLA;
private int jj_gen;
private final int jj_la1[];
private static int jj_la1_0[];
private final JJCalls jj_2_rtns[];
private boolean jj_rescan;
private int jj_gc;
private final LookaheadSuccess jj_ls;
private Vector jj_expentries;
private int jj_expentry[];
private int jj_kind;
private int jj_lasttokens[];
private int jj_endpos;
public HTMLParser(File file)
throws FileNotFoundException
{
this(((InputStream) (new FileInputStream(file))));
}
public String getTitle()
throws IOException, InterruptedException
{
if(pipeIn == null)
getReader();
_L2:
label0:
{
synchronized(this)
{
if(!titleComplete && !pipeInStream.full())
break label0;
}
break; /* Loop/switch isn't completed */
}
wait(10L);
htmlparser;
JVM INSTR monitorexit ;
if(true) goto _L2; else goto _L1
_L1:
return title.toString().trim();
}
public Properties getMetaTags()
throws IOException, InterruptedException
{
if(pipeIn == null)
getReader();
_L2:
label0:
{
synchronized(this)
{
if(!titleComplete && !pipeInStream.full())
break label0;
}
break; /* Loop/switch isn't completed */
}
wait(10L);
htmlparser;
JVM INSTR monitorexit ;
if(true) goto _L2; else goto _L1
_L1:
return metaTags;
}
public String getSummary()
throws IOException, InterruptedException
{
if(pipeIn == null)
getReader();
_L2:
label0:
{
synchronized(this)
{
if(summary.length() < SUMMARY_LENGTH && !pipeInStream.full())
break label0;
}
break; /* Loop/switch isn't completed */
}
wait(10L);
htmlparser;
JVM INSTR monitorexit ;
if(true) goto _L2; else goto _L1
_L1:
if(summary.length() > SUMMARY_LENGTH)
summary.setLength(SUMMARY_LENGTH);
String sum = summary.toString().trim();
String tit = getTitle();
if(sum.startsWith(tit) || sum.equals(""))
return tit;
else
return sum;
}
public Reader getReader()
throws IOException
{
if(pipeIn == null)
{
pipeInStream = new MyPipedInputStream();
pipeOutStream = new PipedOutputStream(pipeInStream);
pipeIn = new InputStreamReader(pipeInStream);
pipeOut = new OutputStreamWriter(pipeOutStream);
Thread thread = new ParserThread(this);
thread.start();
}
return pipeIn;
}
void addToSummary(String text)
{
if(summary.length() < SUMMARY_LENGTH)
{
summary.append(text);
if(summary.length() >= SUMMARY_LENGTH)
synchronized(this)
{
notifyAll();
}
}
}
void addText(String text)
throws IOException
{
if(inStyle)
return;
if(inTitle)
{
title.append(text);
} else
{
addToSummary(text);
if(!titleComplete && !title.equals(""))
synchronized(this)
{
titleComplete = true;
notifyAll();
}
}
length += text.length();
pipeOut.write(text);
afterSpace = false;
}
void addMetaTag()
throws IOException
{
metaTags.setProperty(currentMetaTag, currentMetaContent);
currentMetaTag = null;
currentMetaContent = null;
}
void addSpace()
throws IOException
{
if(!afterSpace)
{
if(inTitle)
title.append(" ");
else
addToSummary(" ");
String space = afterTag ? eol : " ";
length += space.length();
pipeOut.write(space);
afterSpace = true;
}
}
public final void HTMLDocument()
throws ParseException, IOException
{
label0:
do
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 7: // '\007'
case 8: // '\b'
case 11: // '\013'
default:
jj_la1[0] = jj_gen;
break label0;
case 1: // '\001'
case 2: // '\002'
case 3: // '\003'
case 4: // '\004'
case 5: // '\005'
case 6: // '\006'
case 9: // '\t'
case 10: // '\n'
case 12: // '\f'
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 2: // '\002'
Tag();
afterTag = true;
break;
case 3: // '\003'
Token t = Decl();
afterTag = true;
break;
case 4: // '\004'
case 5: // '\005'
CommentTag();
afterTag = true;
break;
case 1: // '\001'
ScriptTag();
afterTag = true;
break;
case 6: // '\006'
Token t = jj_consume_token(6);
addText(t.image);
afterTag = false;
break;
case 9: // '\t'
Token t = jj_consume_token(9);
addText(Entities.decode(t.image));
afterTag = false;
break;
case 12: // '\f'
Token t = jj_consume_token(12);
addText(t.image);
afterTag = false;
break;
case 10: // '\n'
jj_consume_token(10);
addSpace();
afterTag = false;
break;
case 7: // '\007'
case 8: // '\b'
case 11: // '\013'
default:
jj_la1[1] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
}
while(true);
jj_consume_token(0);
}
public final void Tag()
throws ParseException, IOException
{
boolean inImg = false;
Token t1 = jj_consume_token(2);
String tagName = t1.image.toLowerCase();
if(Tags.WS_ELEMS.contains(tagName))
addSpace();
inTitle = tagName.equalsIgnoreCase("<title");
inMetaTag = tagName.equalsIgnoreCase("<META");
inStyle = tagName.equalsIgnoreCase("<STYLE");
inImg = tagName.equalsIgnoreCase("<img");
label0:
do
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
default:
jj_la1[2] = jj_gen;
break label0;
case 15: // '\017'
t1 = jj_consume_token(15);
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 16: // '\020'
jj_consume_token(16);
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 18: // '\022'
case 19: // '\023'
case 20: // '\024'
Token t2 = ArgValue();
if(inImg && t1.image.equalsIgnoreCase("alt") && t2 != null)
addText("[" + t2.image + "]");
if(inMetaTag && (t1.image.equalsIgnoreCase("name") || t1.image.equalsIgnoreCase("HTTP-EQUIV")) && t2 != null)
{
currentMetaTag = t2.image.toLowerCase();
if(currentMetaTag != null && currentMetaContent != null)
addMetaTag();
}
if(inMetaTag && t1.image.equalsIgnoreCase("content") && t2 != null)
{
currentMetaContent = t2.image.toLowerCase();
if(currentMetaTag != null && currentMetaContent != null)
addMetaTag();
}
break;
default:
jj_la1[3] = jj_gen;
break;
}
break;
default:
jj_la1[4] = jj_gen;
break;
}
break;
}
while(true);
jj_consume_token(17);
}
public final Token ArgValue()
throws ParseException
{
Token t = null;
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 18: // '\022'
t = jj_consume_token(18);
return t;
}
jj_la1[5] = jj_gen;
if(jj_2_1(2))
{
jj_consume_token(19);
jj_consume_token(23);
return t;
}
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 19: // '\023'
jj_consume_token(19);
t = jj_consume_token(22);
jj_consume_token(23);
return t;
}
jj_la1[6] = jj_gen;
if(jj_2_2(2))
{
jj_consume_token(20);
jj_consume_token(25);
return t;
}
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 20: // '\024'
jj_consume_token(20);
t = jj_consume_token(24);
jj_consume_token(25);
return t;
}
jj_la1[7] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
public final Token Decl()
throws ParseException
{
Token t = jj_consume_token(3);
label0:
do
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 17: // '\021'
default:
jj_la1[8] = jj_gen;
break label0;
case 15: // '\017'
case 16: // '\020'
case 18: // '\022'
case 19: // '\023'
case 20: // '\024'
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 15: // '\017'
jj_consume_token(15);
break;
case 18: // '\022'
case 19: // '\023'
case 20: // '\024'
ArgValue();
break;
case 16: // '\020'
jj_consume_token(16);
break;
case 17: // '\021'
default:
jj_la1[9] = jj_gen;
jj_consume_token(-1);
throw new ParseException();
}
break;
}
while(true);
jj_consume_token(17);
return t;
}
public final void CommentTag()
throws ParseException
{
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
case 4: // '\004'
jj_consume_token(4);
label0:
do
switch(jj_ntk != -1 ? jj_ntk : jj_ntk())
{
default:
jj_la1[10] = jj_gen;
break label0;
case 26: // '\032'
jj_consume_token(26);
break;
}
while(true);
jj_consume_token(27);
break;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -