⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 albuminfoparser.java

📁 模拟的土豆网视频网站
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package cn.myvideosite.exe.parser;

import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.AlbumInfo;
import cn.myvideosite.data.model.bean.AlbumType;
import cn.myvideosite.data.model.bean.KeyWord;
import cn.myvideosite.data.model.bean.UserInfo;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.AlbumInfoService;
import cn.myvideosite.data.model.services.AlbumTypeService;
import cn.myvideosite.util.HttpUtil;
import cn.myvideosite.util.MySuperDate;

import net.sf.json.JSONArray;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;

public class AlbumInfoParser {

	/**
	 * @param args 解析视频信息  参考 :http://www.56.com/w98/album-aid-6563926.html 
	 *  <div class="sr">      
	 */
	 private static final NodeFilter FILTER_DIV_SR=
		     new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","sr"));	
	 /**
	  * 	<li> 
	  */
	 private static final NodeFilter FILTER_LI=new TagNameFilter("li");
	 /**
	  * <a
	  */
	 private static final NodeFilter FILTER_A=new TagNameFilter("a");
	 /**
	  *  <span>
	  */
	 private static final NodeFilter FILTER_SPAN=new TagNameFilter("span");
	 /**
	  * <div class="fullContent">
	  */
	 private static final NodeFilter FILTER_DIV_FULLCONTENT=
	     new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","fullContent"));	 	
	 /**
	  *  <p class="albumUrl">  	
	  */
	 private static final NodeFilter FILTER_DIV_ALBUMURL=
	     new AndFilter(new TagNameFilter("p"),new HasAttributeFilter("class","albumUrl"));	
	 /**
	  * 
	  * @param url  <div class="sl"> 
	  */
	 private static final NodeFilter FILTER_DIV_SL=
	     new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","sl"));	
	 /**
	  * 
	  * @param url  <img 
	  */
	 private static final NodeFilter FILTER_IMG=new TagNameFilter("img");		 		
	 /**
	  * 
	  * @param url  <dt>
	  */
	 private static final NodeFilter FILTER_DT=new TagNameFilter("dt");
	 /**
	  *   <title>
	  */
	 private static final NodeFilter FILTER_TITLE=new TagNameFilter("title");
	 
	 private static int COUNT=1;
	 
     public static AlbumInfo  parse(String url){
    	  AlbumInfo albuminfo = AlbumInfoService.findByAlbumAddr(url);
    	  if( albuminfo != null) return albuminfo;
    	  
    	  if(url.equals("http://www.56.com/w26/album-aid-145530.html")) { return null; }  // 乱码
    	/*  if(url.equals("http://www.56.com/w98/album-aid-3677122.html")) { return null; }  
    	  if(url.equals("http://www.56.com/w31/album-aid-5170920.html")) { return null; }  
    	  if(url.equals("http://www.56.com/w11/album-aid-572537.html")) { return null; } 
    	  if(url.equals("http://www.56.com/w76/album-aid-1486454.html")) { return null; } 
    	  if(url.equals("http://www.56.com/w96/album-aid-145422.html")) { return null; } 
    	  if(url.equals("http://www.56.com/w97/album-aid-180489.html")) { return null; } */
    	  
		  String page=HttpUtil.request(url,  Constant.CHARSET_GB2312);		  		 
		  if(page != null){
		  if(page.equals("wfabc")){ return null;}
		  Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);		  		
		  try {
			NodeList titleNL=pageParser.parse(FILTER_TITLE);
			if(titleNL != null && titleNL.size()>0){
				String str2=titleNL.elementAt(0).getChildren().elementAt(0).getText(); 
				if( str2.equals("大学生 - 56.com - 全国最大的免费视频分享平台")) { return null; }
			}
						
			albuminfo=new AlbumInfo();
			pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);	
			NodeList nl=pageParser.parse(FILTER_DIV_SR);			
			pageParser=Parser.createParser(nl.toHtml(), Constant.CHARSET_GB2312);
			NodeList liNL=pageParser.parse(FILTER_LI);							
			List<KeyWord> keywordList=new ArrayList<KeyWord>();
			if(liNL!=null && liNL.size()>0 ){					   
					Node liNode =  liNL.elementAt(0);                              //标题        		
                    pageParser=Parser.createParser(liNode.toHtml(), Constant.CHARSET_GB2312);
                    NodeList aNL=pageParser.parse(FILTER_A);
                    if(aNL!=null && aNL.size()>0){                   	
                    	LinkTag  aLink=(LinkTag) aNL.elementAt(0); 
                    	if(aLink != null){
                    	   albuminfo.setAlbumTitle(aLink.getLinkText());               //标题名称                  
                    	   albuminfo.setFlashUrl(aLink.getLink());                //标题连接 即专辑的flash地址  
                    	}
System.out.println("标题:"+aLink.getLinkText());
System.out.println("flashURL:"+aLink.getLink());
                    }                  
                    Node liNode2 =  liNL.elementAt(1);
                    pageParser=Parser.createParser(liNode2.toHtml(), Constant.CHARSET_GB2312); //视频数    liNode2.getFirstChild().getText()
                    NodeList spanNL=pageParser.parse(FILTER_SPAN);
                    if(spanNL!=null && spanNL.size()>0){
                    	Node spanNode=spanNL.elementAt(0);
                    	if( spanNode != null)
                    	    albuminfo.setVideoNub(Integer.parseInt(spanNode.getFirstChild().getText()));
                    	else 
                    		albuminfo.setVideoNub(0);
//System.out.println("视频数:"+Integer.parseInt(spanNode.getFirstChild().getText()));	
                    }
                    Node liNode4 =  liNL.elementAt(3);            // 类别                         
                    pageParser=Parser.createParser(liNode4.toHtml(), Constant.CHARSET_GB2312);
                    NodeList a2NL=pageParser.parse(FILTER_A);
                    if(a2NL!=null && a2NL.size()>0){
                    	LinkTag link=(LinkTag) a2NL.elementAt(0);
 //System.out.println("类别:"+link.getLinkText());
                        AlbumType albumtype=AlbumTypeService.findByAlbumName(link.getLinkText()); 
                        if(albumtype != null) 
                        	albuminfo.setAlbumTypeId(albumtype.getTypeId());     //保存 类别id                         
                        else 
                        	albuminfo.setAlbumTypeId(0);                       	                        
                    }                   
                    Node liNode5 =  liNL.elementAt(4);      //关键词         liNode5.getFirstChild().getText()   
                    pageParser=Parser.createParser(liNode5.toHtml(), Constant.CHARSET_GB2312);
                    NodeList a3NL=pageParser.parse(FILTER_A);                    
                    if(a3NL!=null && a3NL.size()>0){
                    	for(int i=0;i<a3NL.size();i++){
                    	LinkTag  a3Link=(LinkTag) a3NL.elementAt(i); 
 //System.out.println("关键词:"+ a3Link.getLinkText());        
                    	KeyWord keyword=new KeyWord();
                    	if(keyword != null){
                    		keyword.setKeyName(a3Link.getLinkText());
                        	keywordList.add(keyword);        
                    	}                   	            	
                       }                   	                    	
                    }                   
                    Node liNode6 =  liNL.elementAt(5);      //创建时间  liNode6.getFirstChild().getText()
                    pageParser=Parser.createParser(liNode6.toHtml(), Constant.CHARSET_GB2312);
                    NodeList apanNL=pageParser.parse(FILTER_SPAN);
                    if(apanNL!=null && apanNL.size()>0){                   	
                    	Node  spanNode= apanNL.elementAt(0);  
                    	if(spanNode != null)
                    	   albuminfo.setCreateTime(new MySuperDate(spanNode.getFirstChild().getText()).getDate());
                    	else
                    	   albuminfo.setCreateTime(new Date());
 //System.out.println("上传时间:"+ spanNode.getFirstChild().getText());                    	
                    }                                      
                    Node liNode7 =  liNL.elementAt(6);           //by yuxiong  专辑介绍
                    pageParser=Parser.createParser(liNode7.toHtml(), Constant.CHARSET_GB2312);
                    NodeList fullcontentNL=pageParser.parse(FILTER_DIV_FULLCONTENT);                               

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -