⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 videoinfoparser.java

📁 模拟的土豆网视频网站
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package cn.myvideosite.exe.parser;

import java.util.Date;

import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.ChannelClass;
import cn.myvideosite.data.model.bean.UserInfo;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.ChannelService;
import cn.myvideosite.data.model.services.VideoInfoService;
import cn.myvideosite.util.HttpUtil;
import cn.myvideosite.util.MySuperDate;

public class VideoInfoParser {

	/**
	 * @param args 解析视频信息
	 *  	<div id="videoInfo_con">  
   	*  // 
	 */
	/* private static final NodeFilter FILTER_DIV_VIDEOINFO=
	     new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("id","videoInfo_con"));*/
	 /**
	  * <dl class="uInfo"> 视频用户信息
	  */
	private static final NodeFilter FILTER_DIV_UINFO=
	     new AndFilter(new TagNameFilter("dl"),new HasAttributeFilter("class","uInfo"));
	 /**
	  *  <ul class="vInfo">  视频信息
	  */
	private static final NodeFilter FILTER_UL_VINFO=
	     new AndFilter(new TagNameFilter("ul"),new HasAttributeFilter("class","vInfo"));
	 /**
		 *  	<span>
		 * @param url
		 */
	private static final NodeFilter FILTER_SPAN=new TagNameFilter("span");	
	 /**
	  * <li>
	  */
	private static final NodeFilter FILTER_LI=new TagNameFilter("li");	
	 
	 /**
	  * <a
	  * @param url
	  */
	private static final NodeFilter FILTER_A=new TagNameFilter("a");	
	/**
	 *  	 
	 * @param url  <title>56网视频</title>
	 * @return
	 */
	private static final NodeFilter FILTER_TITLE=new TagNameFilter("title");		
	/**
	 * 
	 * @param url   抓取美女主播的信息 <div class="mid clearfix">  <div class="uinfo clearfix">
	 * @return<div class="morevinfo"
	 */
	private static final NodeFilter FILTER_DIV_MIDCLEAR=
	     new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","mid clearfix"));
	/**
	 * 
	 * @param url   <p
	 * @return
	 */
	private static final NodeFilter FILTER_P=new TagNameFilter("p");
	/**
	 * 
	 * @param url<div class="morevinfo"    视频简介
	 * @return
	 */
	private static final NodeFilter FILTER_DIV_MOREVINFO=
	     new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","morevinfo"));
	/**
	 * 
	 * @param url
	 * @return       <input id="input_page_url"
	 */
	private static final NodeFilter FILTER_INPUT=new TagNameFilter("input");
	/**
	 *     	<div class="albumimg">   视频的相关专辑信息
	 */
	private static final NodeFilter FILTER_DIV_ALBUMIMG=
		  new  AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","albumimg"));
	/**
	 * 
	 * @param url  <img
	 * @return
	 */
	//private static final NodeFilter FILTER_IMG=new TagNameFilter("img");
	/**
	 * 
	 * @param url  	<p class="more">
	 * @return
	 */
	private static final NodeFilter FILTER_P_MORE=
		  new  AndFilter (new TagNameFilter("p"),new HasAttributeFilter("class","more"));
	
	/**
	 * 
	 * @param url     导演(Director)信息 
	 * @return   <h2 <1
	 */
	private static final NodeFilter FILTER_H1=new TagNameFilter("h1");
	
	private static final NodeFilter FILTER_H2=new TagNameFilter("h2");
	
	/**
	 * 
	 * @param url 	<dl class="uinfo">  
	 * @return
	 */
	/*private static final NodeFilter FILTER_P_CONTENT=
		  new  AndFilter (new TagNameFilter("dl"),new HasAttributeFilter("class","uinfo"));*/
	/**
	 * 
	 * @param url  <div class="vinfo" <dd>
	 * @return
	 */
	/*private static final NodeFilter FILTER_DIV_VINFO=
		  new  AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","vinfo"));*/
	
	/**
	 *  
	 * @param url   导演信息的  <dd> 标签 
	 * @return
	 */
	//private static final NodeFilter FILTER_DD=new TagNameFilter("dd");
	/**
	 * 
	 * @param url   视频标题标签    <div class = "title3";>
	 * @return
	 */
	private static final NodeFilter FILTER_DIV_TITLE3=
		  new  AndFilter (new TagNameFilter("div"),new HasAttributeFilter("class","title3"));
	
	public static VideoInformation parse(String url){
		VideoInformation videoinfo = VideoInfoService.findByFlashAddr(url);
		if( videoinfo != null) return videoinfo; 
		
		String  page=HttpUtil.request(url,  Constant.CHARSET_GB2312);
		
		if(page !=null){		
		Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);		
		try { 
			NodeList titleNL=pageParser.parse(FILTER_TITLE);			//判断标题是否为 56网  .........
			String str=titleNL.elementAt(0).getChildren().elementAt(0).getText();
//System.out.println(str);
			
			pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);           //判断是否为导演信息 或是雷区	
			NodeList h2NL=pageParser.parse(FILTER_H2); 
            if(h2NL !=null && h2NL.size()>0){
            	String str1=h2NL.elementAt(0).getChildren().elementAt(0).getText();	
            	if(str1.equals("导演视频信息") || str1.equals("今天你被雷到了吗?") || str1.equals("用户评论"))  
            	{return null;}
//System.out.println(str1);
			}
								
			if(str.equals("56网")  || str.equals("56") || str==null || str.equals("56tv - 播放") || str.equals("白领公寓[21].56tv - 播放"))    {return null;}			
			
			 videoinfo = new VideoInformation();
			
			if(str.equals("56网视频") || str.equals("56.com 视频")){
		    pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);	
			NodeList uinfoNL=pageParser.parse(FILTER_DIV_UINFO);	
				
			titleParse( page , videoinfo);            //保存 标题
			
			if(uinfoNL !=null && uinfoNL.size()>0){
				for(int i=0;i<uinfoNL.size();i++){
					Node uinfoNode=uinfoNL.elementAt(i);						
				    pageParser=Parser.createParser(uinfoNode.toHtml(), Constant.CHARSET_GB2312);				    
				    NodeList aNL=pageParser.parse(FILTER_A);   
				    if(aNL !=null && aNL.size()>0){	
				            LinkTag link=(LinkTag) aNL.elementAt(0);
System.out.println("会员空间地址:"+link.getLink());                                            
                           UserInfo userInfo=UserInfoParser.parse(link.getLink());
                           if(userInfo !=null ){//会员空间地址        
                        	   videoinfo.setUserId(userInfo.getUserId());        
                           } else{
                        	   videoinfo.setUserId(0);   
                           }
				    }
				    pageParser=Parser.createParser(uinfoNode.toHtml(), Constant.CHARSET_GB2312);	
				    NodeList spanNL=pageParser.parse(FILTER_SPAN);   //上传时间
				    if(spanNL !=null && spanNL.size()>0){				    	
				    		Node spanNode=spanNL.elementAt(3);
				    		if(spanNode != null){
				    		  videoinfo.setUploadTime(new MySuperDate(spanNode.getFirstChild().getText(), false).getDate());
 System.out.println("上传时间:"+spanNode.getFirstChild().getText());
				    		}else{
				    			videoinfo.setUploadTime(new Date());
				    		}
				    }				    
				}				
			}			
		    pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
			NodeList vinfoNL=pageParser.parse(FILTER_UL_VINFO);
			if(vinfoNL !=null && vinfoNL.size()>0 ){
				for(int i=0;i<vinfoNL.size();i++){
					Node vinfoNode=vinfoNL.elementAt(i);	
					pageParser=Parser.createParser(vinfoNode.toHtml(), Constant.CHARSET_GB2312);
					NodeList liNL=pageParser.parse(FILTER_LI);
					
					 if(liNL !=null && liNL.size()>0 ){						 						
							 Node liNode=liNL.elementAt(1);     // 频道
							 if(liNode !=null && !liNode.equals("")){
							 pageParser=Parser.createParser(liNode.toHtml(), Constant.CHARSET_GB2312);	 // ?								 
							 NodeList aNL=pageParser.parse(FILTER_A);  
						     if(aNL !=null && aNL.size()>0){								
						    		LinkTag link=(LinkTag) aNL.elementAt(0);
System.out.println("频道:"+link.getLinkText());								
									ChannelClass channel = ChannelService.findByChannelName(link.getLinkText());
                                    if(channel != null){
                                         videoinfo.setChannelId(channel.getChannelId());
                                    } else{
                                    	 videoinfo.setChannelId(0);
                                    }                  
						     }
							}
							 Node liNode3=liNL.elementAt(3);       //视频简介								 
							 if(liNode3 !=null && !liNode3.equals("")){
		                       videoinfo.setIntroduction(liNode3.getChildren().elementAt(1).getText());
 System.out.println("视频简介:"+liNode3.getChildren().elementAt(1).getText());	
							 }   
					 }
				}
			}		
		}else{
			pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);		
			NodeList h1NL=pageParser.parse(FILTER_H1);
			if(h1NL != null && h1NL.size()>0){
				Node h1Node = h1NL.elementAt(0);
				if( h1NL != null )
					videoinfo.setVideoTitle(h1Node.getLastChild().toPlainTextString());
System.out.println("标题:"+h1Node.getLastChild().toPlainTextString());
			}
					
			pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);			
			NodeList midClearNL=pageParser.parse(FILTER_DIV_MIDCLEAR);
			

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -