📄 totalvideoaddr.java
字号:
package cn.myvideosite.exe.parser;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.VideoInfoService;
import cn.myvideosite.exception.DownloadException;
import cn.myvideosite.util.FileUtil;
import cn.myvideosite.util.HttpUtil;
import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;
public class TotalVideoAddr {
/**
*
* @param url <p class="tab">
*/
/*private static final NodeFilter FILTER_DIV_TAB=
new AndFilter(new TagNameFilter("p"),new HasAttributeFilter("class","tab"));*/
/**
*
* @param url <a
*/
private static final NodeFilter FILTER_DIV_A=new TagNameFilter("a");
/**
* <div class="video" > <span id=s_Mzc5NTUxMzQ>
*/
private static final NodeFilter FILTER_DIV_VIDEO=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","video"));
/**
* <span id=s_Mzc5NTUxMzQ>
*/
private static final NodeFilter FILTER_DIV_SPAN=new TagNameFilter("span");
/**
* <img src=
*/
private static final NodeFilter FILTER_DIV_IMG=new TagNameFilter("img");
private static final String INDEX_URL="http://so.56.com/index?type=video&key=";
/**
*
* @param url 抓取频道页的所有子连接
*/
private static void channelParse(String url){
/*String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList tabNL=pageParser.parse(FILTER_DIV_TAB);
if( tabNL != null && tabNL.size()>0){
pageParser=Parser.createParser(tabNL.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_DIV_A);
if( aNL != null && aNL.size()>0){
for(int i2=98;i2<aNL.size();i2++){
LinkTag aLink=(LinkTag) aNL.elementAt(i2);
System.out.println(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
pages(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
System.out.println("****************"+aLink.getLinkText()+"下载完毕!!"+"******************");
}
}
}
} catch (ParserException e) {
e.printStackTrace();
}catch (UnsupportedEncodingException e){
e.printStackTrace();
}
} */
try {
pages(INDEX_URL+URLEncoder.encode("封神榜 ","GBK"));
pages(INDEX_URL+URLEncoder.encode("大话西游","GBK"));
pages(INDEX_URL+URLEncoder.encode("武林外传","GBK"));
pages(INDEX_URL+URLEncoder.encode("拳皇","GBK"));
} catch (UnsupportedEncodingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
/**
*
* @param url 抓取一个频道一个页面的视频连接和图片
*/
public static void childrenParse(String url){
int n=1;
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList nl=pageParser.parse(FILTER_DIV_VIDEO);
pageParser=Parser.createParser(nl.toHtml(), Constant.CHARSET_GB2312);
NodeList spanNL=pageParser.parse(FILTER_DIV_SPAN);
if(spanNL!=null && spanNL.size()>0 ){
for(int i=0;i<spanNL.size();i++){
VideoInformation videoinfo= null;
Node spanNode=spanNL.elementAt(i);
pageParser=Parser.createParser(spanNode.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_DIV_A);
if( aNL != null && aNL.size()>0){
for(int i2=0;i2<aNL.size();i2++){
LinkTag link=(LinkTag) aNL.elementAt(i2);
System.out.println("=============="+link.getLink()+(n++)+"==================");
videoinfo=VideoInfoParser.parse(link.getLink());
// VideoInfoParser.getVideoAlbumMore(link.getLink());
if(videoinfo!=null)
videoinfo.setFlashAddress(link.getLink());
}
}
// 视频 图片下载
if(videoinfo!=null){
pageParser=Parser.createParser(spanNode.toHtml(), Constant.CHARSET_GB2312);
NodeList imgNL=pageParser.parse(FILTER_DIV_IMG);
if( imgNL != null && imgNL.size()>0){
for(int i2=0;i2<imgNL.size();i2++){
Node imgNode=imgNL.elementAt(i2);
if(imgNode !=null){
if(imgNode instanceof ImageTag){
ImageTag img=(ImageTag)imgNode;
videoinfo.setSoureUrl(img.getImageURL());
System.out.println("图片地址:"+img.getImageURL());
try {
String newImg = FileUtil.download(img.getImageURL());
videoinfo.setNewUrl(newImg);
System.out.println ("图片新地址:"+newImg);
} catch (DownloadException e) {
videoinfo.setNewUrl("");
e.printStackTrace();
}
}
}
}
}
VideoInfoService.save(videoinfo);
}
}
}
} catch (ParserException e) {
e.printStackTrace();
}
}
}
/**
* 取得一个频道的所有视频连接地址
*/
public static void pages(String url){
for(int ii=0;ii<99;ii++){
String surl = url+"&startat="+10*ii;
childrenParse(surl);
}
}
public static void main(String[] args) {
channelParse("http://www.56.com/w/show_channel.phtml");
//childrenParser("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
//pages("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
/*try {
System.out.println(URLEncoder.encode("电视剧", "GBK"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}*/
//page("http://www.56.com/w/Channel.php?c=3&tag=%D4%AD%B4%B4");
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -