⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 totalalbumaddr.java

📁 模拟的土豆网视频网站
💻 JAVA
字号:
package cn.myvideosite.exe.parser;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;

import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import cn.myvideosite.commons.Constant;
import cn.myvideosite.util.HttpUtil;

import java.io.UnsupportedEncodingException;
import java.net.URLEncoder;

public class TotalAlbumAddr {

	/**
	 * 
	 * @param url   <p class="tab">
	 */
	private static final NodeFilter FILTER_DIV_TAB=
		   new AndFilter(new TagNameFilter("p"),new HasAttributeFilter("class","tab"));
	/**
	 * 
	 * @param url  <a
	 */
	private static final NodeFilter FILTER_DIV_A=new TagNameFilter("a");
	/**
	 * 	<div class="video" >  <span id=s_Mzc5NTUxMzQ>
	 */
	private static final NodeFilter FILTER_DIV_VIDEO=
		   new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","albumimg"));
	/**
	 *  <span id=s_Mzc5NTUxMzQ>
	 */
	private static final NodeFilter FILTER_DIV_SPAN=new TagNameFilter("span");

    
    private static final String INDEX_URL="http://so.56.com/index?type=album&key=";
	
	/**
	 * 
	 * @param url   抓取频道页的所有子连接
	 */
	private static void channelParse(String url){		
		String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
		if(page != null){
		Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
		try {			
			NodeList tabNL=pageParser.parse(FILTER_DIV_TAB);
			if( tabNL != null && tabNL.size()>0){								
					pageParser=Parser.createParser(tabNL.toHtml(), Constant.CHARSET_GB2312);
					NodeList aNL=pageParser.parse(FILTER_DIV_A);
					if( aNL != null && aNL.size()>0){
						for(int i2=104;i2<aNL.size();i2++){  //  健康
							LinkTag aLink=(LinkTag) aNL.elementAt(i2);
							 System.out.println(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));
                             pages(INDEX_URL+URLEncoder.encode(aLink.getLinkText(),"GBK"));  														
                             System.out.println("****************"+aLink.getLinkText()+"下载完毕!!"+"******************");                             
						}					
				}
			}			
		} catch (ParserException e) {			
			e.printStackTrace();
		}catch (UnsupportedEncodingException e){			
			e.printStackTrace();
		}
	  }		          
		/*try {																																			
			pages(INDEX_URL+URLEncoder.encode("独立电影","GBK"));																																																																		
		} catch (UnsupportedEncodingException e) {			
			e.printStackTrace();
		}  */		
	}
	/**
	 * 
	 * @param url  抓取一个频道一个页面的视频连接和图片     
	 */
	public static void childrenParse(String url){	
		int n=1;
		String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
		if(page != null){
		Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);		
		try {
			NodeList nl=pageParser.parse(FILTER_DIV_VIDEO);					
			pageParser=Parser.createParser(nl.toHtml(), Constant.CHARSET_GB2312);
			NodeList spanNL=pageParser.parse(FILTER_DIV_SPAN);			 		
			if(spanNL!=null && spanNL.size()>0 ){
				for(int i=0;i<spanNL.size();i++){					
					Node spanNode=spanNL.elementAt(i);				
					pageParser=Parser.createParser(spanNode.toHtml(), Constant.CHARSET_GB2312);
					NodeList aNL=pageParser.parse(FILTER_DIV_A);
					if( aNL != null && aNL.size()>0){
						for(int i2=0;i2<aNL.size();i2++){
							LinkTag link=(LinkTag) aNL.elementAt(i2);						
                            System.out.println("=============="+link.getLink()+(n++)+"==================");	
                            if( link.getLink() != null){ 
                            	AlbumInfoParser.parse(link.getLink());     
                            }                                                                                                       
						}
			         }	
				}
			}		
		} catch (ParserException e){			
			e.printStackTrace();
		}
		}
	}	
	/**
	 *      取得一个频道的所有视频连接地址 
	 */
    public static void pages(String url){
    	for(int ii=0;ii<=50;ii++){	
	    	 String surl = url+"&startat="+10*ii;    		
		     childrenParse(surl);
		    
    	}   
    }   
    
	public static void main(String[] args) {
		channelParse("http://www.56.com/w/show_channel.phtml");
		//childrenParser("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
		//pages("http://so.56.com/index?type=video&key=%D4%AD%B4%B4");
		/*try {
			System.out.println(URLEncoder.encode("电视剧", "GBK"));
		} catch (UnsupportedEncodingException e) {		
			e.printStackTrace();
		}*/
		//page("http://www.56.com/w/Channel.php?c=3&tag=%D4%AD%B4%B4");
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -