📄 videoinfoparser.java
字号:
package cn.myvideosite.exe.parser;
import java.util.Date;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.ChannelClass;
import cn.myvideosite.data.model.bean.UserInfo;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.ChannelService;
import cn.myvideosite.data.model.services.VideoInfoService;
import cn.myvideosite.util.HttpUtil;
import cn.myvideosite.util.MySuperDate;
public class VideoInfoParser {
/**
* @param args 解析视频信息
* <div id="videoInfo_con">
* //
*/
/* private static final NodeFilter FILTER_DIV_VIDEOINFO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("id","videoInfo_con"));*/
/**
* <dl class="uInfo"> 视频用户信息
*/
private static final NodeFilter FILTER_DIV_UINFO=
new AndFilter(new TagNameFilter("dl"),new HasAttributeFilter("class","uInfo"));
/**
* <ul class="vInfo"> 视频信息
*/
private static final NodeFilter FILTER_UL_VINFO=
new AndFilter(new TagNameFilter("ul"),new HasAttributeFilter("class","vInfo"));
/**
* <span>
* @param url
*/
private static final NodeFilter FILTER_SPAN=new TagNameFilter("span");
/**
* <li>
*/
private static final NodeFilter FILTER_LI=new TagNameFilter("li");
/**
* <a
* @param url
*/
private static final NodeFilter FILTER_A=new TagNameFilter("a");
/**
*
* @param url <title>56网视频</title>
* @return
*/
private static final NodeFilter FILTER_TITLE=new TagNameFilter("title");
/**
*
* @param url 抓取美女主播的信息 <div class="mid clearfix"> <div class="uinfo clearfix">
* @return<div class="morevinfo"
*/
private static final NodeFilter FILTER_DIV_MIDCLEAR=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","mid clearfix"));
/**
*
* @param url <p
* @return
*/
private static final NodeFilter FILTER_P=new TagNameFilter("p");
/**
*
* @param url<div class="morevinfo" 视频简介
* @return
*/
private static final NodeFilter FILTER_DIV_MOREVINFO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","morevinfo"));
/**
*
* @param url
* @return <input id="input_page_url"
*/
private static final NodeFilter FILTER_INPUT=new TagNameFilter("input");
/**
* <div class="albumimg"> 视频的相关专辑信息
*/
private static final NodeFilter FILTER_DIV_ALBUMIMG=
new AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","albumimg"));
/**
*
* @param url <img
* @return
*/
//private static final NodeFilter FILTER_IMG=new TagNameFilter("img");
/**
*
* @param url <p class="more">
* @return
*/
private static final NodeFilter FILTER_P_MORE=
new AndFilter (new TagNameFilter("p"),new HasAttributeFilter("class","more"));
/**
*
* @param url 导演(Director)信息
* @return <h2 <1
*/
private static final NodeFilter FILTER_H1=new TagNameFilter("h1");
private static final NodeFilter FILTER_H2=new TagNameFilter("h2");
/**
*
* @param url <dl class="uinfo">
* @return
*/
/*private static final NodeFilter FILTER_P_CONTENT=
new AndFilter (new TagNameFilter("dl"),new HasAttributeFilter("class","uinfo"));*/
/**
*
* @param url <div class="vinfo" <dd>
* @return
*/
/*private static final NodeFilter FILTER_DIV_VINFO=
new AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","vinfo"));*/
/**
*
* @param url 导演信息的 <dd> 标签
* @return
*/
//private static final NodeFilter FILTER_DD=new TagNameFilter("dd");
/**
*
* @param url 视频标题标签 <div class = "title3";>
* @return
*/
private static final NodeFilter FILTER_DIV_TITLE3=
new AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","title3"));
public static VideoInformation parse(String url){
VideoInformation videoinfo = VideoInfoService.findByFlashAddr(url);
if( videoinfo != null) return videoinfo;
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page !=null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList titleNL=pageParser.parse(FILTER_TITLE); //判断标题是否为 56网 .........
String str=titleNL.elementAt(0).getChildren().elementAt(0).getText();
//System.out.println(str);
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312); //判断是否为导演信息 或是雷区
NodeList h2NL=pageParser.parse(FILTER_H2);
if(h2NL !=null && h2NL.size()>0){
String str1=h2NL.elementAt(0).getChildren().elementAt(0).getText();
if(str1.equals("导演视频信息") || str1.equals("今天你被雷到了吗?") || str1.equals("用户评论"))
{return null;}
//System.out.println(str1);
}
if(str.equals("56网") || str.equals("56") || str==null || str.equals("56tv - 播放") || str.equals("白领公寓[21].56tv - 播放")) {return null;}
videoinfo = new VideoInformation();
if(str.equals("56网视频") || str.equals("56.com 视频")){
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList uinfoNL=pageParser.parse(FILTER_DIV_UINFO);
titleParse( page , videoinfo); //保存 标题
if(uinfoNL !=null && uinfoNL.size()>0){
for(int i=0;i<uinfoNL.size();i++){
Node uinfoNode=uinfoNL.elementAt(i);
pageParser=Parser.createParser(uinfoNode.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_A);
if(aNL !=null && aNL.size()>0){
LinkTag link=(LinkTag) aNL.elementAt(0);
System.out.println("会员空间地址:"+link.getLink());
UserInfo userInfo=UserInfoParser.parse(link.getLink());
if(userInfo !=null ){//会员空间地址
videoinfo.setUserId(userInfo.getUserId());
} else{
videoinfo.setUserId(0);
}
}
pageParser=Parser.createParser(uinfoNode.toHtml(), Constant.CHARSET_GB2312);
NodeList spanNL=pageParser.parse(FILTER_SPAN); //上传时间
if(spanNL !=null && spanNL.size()>0){
Node spanNode=spanNL.elementAt(3);
if(spanNode != null){
videoinfo.setUploadTime(new MySuperDate(spanNode.getFirstChild().getText(), false).getDate());
System.out.println("上传时间:"+spanNode.getFirstChild().getText());
}else{
videoinfo.setUploadTime(new Date());
}
}
}
}
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList vinfoNL=pageParser.parse(FILTER_UL_VINFO);
if(vinfoNL !=null && vinfoNL.size()>0 ){
for(int i=0;i<vinfoNL.size();i++){
Node vinfoNode=vinfoNL.elementAt(i);
pageParser=Parser.createParser(vinfoNode.toHtml(), Constant.CHARSET_GB2312);
NodeList liNL=pageParser.parse(FILTER_LI);
if(liNL !=null && liNL.size()>0 ){
Node liNode=liNL.elementAt(1); // 频道
if(liNode !=null && !liNode.equals("")){
pageParser=Parser.createParser(liNode.toHtml(), Constant.CHARSET_GB2312); // ?
NodeList aNL=pageParser.parse(FILTER_A);
if(aNL !=null && aNL.size()>0){
LinkTag link=(LinkTag) aNL.elementAt(0);
System.out.println("频道:"+link.getLinkText());
ChannelClass channel = ChannelService.findByChannelName(link.getLinkText());
if(channel != null){
videoinfo.setChannelId(channel.getChannelId());
} else{
videoinfo.setChannelId(0);
}
}
}
Node liNode3=liNL.elementAt(3); //视频简介
if(liNode3 !=null && !liNode3.equals("")){
videoinfo.setIntroduction(liNode3.getChildren().elementAt(1).getText());
System.out.println("视频简介:"+liNode3.getChildren().elementAt(1).getText());
}
}
}
}
}else{
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList h1NL=pageParser.parse(FILTER_H1);
if(h1NL != null && h1NL.size()>0){
Node h1Node = h1NL.elementAt(0);
if( h1NL != null )
videoinfo.setVideoTitle(h1Node.getLastChild().toPlainTextString());
System.out.println("标题:"+h1Node.getLastChild().toPlainTextString());
}
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList midClearNL=pageParser.parse(FILTER_DIV_MIDCLEAR);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -