📄 albuminfoparser.java
字号:
package cn.myvideosite.exe.parser;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.ImageTag;
import org.htmlparser.tags.InputTag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import cn.myvideosite.commons.Constant;
import cn.myvideosite.data.model.bean.AlbumInfo;
import cn.myvideosite.data.model.bean.AlbumType;
import cn.myvideosite.data.model.bean.KeyWord;
import cn.myvideosite.data.model.bean.UserInfo;
import cn.myvideosite.data.model.bean.VideoInformation;
import cn.myvideosite.data.model.services.AlbumInfoService;
import cn.myvideosite.data.model.services.AlbumTypeService;
import cn.myvideosite.util.HttpUtil;
import cn.myvideosite.util.MySuperDate;
import net.sf.json.JSONArray;
import net.sf.json.JSONException;
import net.sf.json.JSONObject;
public class AlbumInfoParser {
/**
* @param args 解析视频信息 参考 :http://www.56.com/w98/album-aid-6563926.html
* <div class="sr">
*/
private static final NodeFilter FILTER_DIV_SR=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","sr"));
/**
* <li>
*/
private static final NodeFilter FILTER_LI=new TagNameFilter("li");
/**
* <a
*/
private static final NodeFilter FILTER_A=new TagNameFilter("a");
/**
* <span>
*/
private static final NodeFilter FILTER_SPAN=new TagNameFilter("span");
/**
* <div class="fullContent">
*/
private static final NodeFilter FILTER_DIV_FULLCONTENT=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","fullContent"));
/**
* <p class="albumUrl">
*/
private static final NodeFilter FILTER_DIV_ALBUMURL=
new AndFilter(new TagNameFilter("p"),new HasAttributeFilter("class","albumUrl"));
/**
*
* @param url <div class="sl">
*/
private static final NodeFilter FILTER_DIV_SL=
new AndFilter(new TagNameFilter("div"),new HasAttributeFilter("class","sl"));
/**
*
* @param url <img
*/
private static final NodeFilter FILTER_IMG=new TagNameFilter("img");
/**
*
* @param url <dt>
*/
private static final NodeFilter FILTER_DT=new TagNameFilter("dt");
/**
* <title>
*/
private static final NodeFilter FILTER_TITLE=new TagNameFilter("title");
private static int COUNT=1;
public static AlbumInfo parse(String url){
AlbumInfo albuminfo = AlbumInfoService.findByAlbumAddr(url);
if( albuminfo != null) return albuminfo;
if(url.equals("http://www.56.com/w26/album-aid-145530.html")) { return null; } // 乱码
/* if(url.equals("http://www.56.com/w98/album-aid-3677122.html")) { return null; }
if(url.equals("http://www.56.com/w31/album-aid-5170920.html")) { return null; }
if(url.equals("http://www.56.com/w11/album-aid-572537.html")) { return null; }
if(url.equals("http://www.56.com/w76/album-aid-1486454.html")) { return null; }
if(url.equals("http://www.56.com/w96/album-aid-145422.html")) { return null; }
if(url.equals("http://www.56.com/w97/album-aid-180489.html")) { return null; } */
String page=HttpUtil.request(url, Constant.CHARSET_GB2312);
if(page != null){
if(page.equals("wfabc")){ return null;}
Parser pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
try {
NodeList titleNL=pageParser.parse(FILTER_TITLE);
if(titleNL != null && titleNL.size()>0){
String str2=titleNL.elementAt(0).getChildren().elementAt(0).getText();
if( str2.equals("大学生 - 56.com - 全国最大的免费视频分享平台")) { return null; }
}
albuminfo=new AlbumInfo();
pageParser=Parser.createParser(page, Constant.CHARSET_GB2312);
NodeList nl=pageParser.parse(FILTER_DIV_SR);
pageParser=Parser.createParser(nl.toHtml(), Constant.CHARSET_GB2312);
NodeList liNL=pageParser.parse(FILTER_LI);
List<KeyWord> keywordList=new ArrayList<KeyWord>();
if(liNL!=null && liNL.size()>0 ){
Node liNode = liNL.elementAt(0); //标题
pageParser=Parser.createParser(liNode.toHtml(), Constant.CHARSET_GB2312);
NodeList aNL=pageParser.parse(FILTER_A);
if(aNL!=null && aNL.size()>0){
LinkTag aLink=(LinkTag) aNL.elementAt(0);
if(aLink != null){
albuminfo.setAlbumTitle(aLink.getLinkText()); //标题名称
albuminfo.setFlashUrl(aLink.getLink()); //标题连接 即专辑的flash地址
}
System.out.println("标题:"+aLink.getLinkText());
System.out.println("flashURL:"+aLink.getLink());
}
Node liNode2 = liNL.elementAt(1);
pageParser=Parser.createParser(liNode2.toHtml(), Constant.CHARSET_GB2312); //视频数 liNode2.getFirstChild().getText()
NodeList spanNL=pageParser.parse(FILTER_SPAN);
if(spanNL!=null && spanNL.size()>0){
Node spanNode=spanNL.elementAt(0);
if( spanNode != null)
albuminfo.setVideoNub(Integer.parseInt(spanNode.getFirstChild().getText()));
else
albuminfo.setVideoNub(0);
//System.out.println("视频数:"+Integer.parseInt(spanNode.getFirstChild().getText()));
}
Node liNode4 = liNL.elementAt(3); // 类别
pageParser=Parser.createParser(liNode4.toHtml(), Constant.CHARSET_GB2312);
NodeList a2NL=pageParser.parse(FILTER_A);
if(a2NL!=null && a2NL.size()>0){
LinkTag link=(LinkTag) a2NL.elementAt(0);
//System.out.println("类别:"+link.getLinkText());
AlbumType albumtype=AlbumTypeService.findByAlbumName(link.getLinkText());
if(albumtype != null)
albuminfo.setAlbumTypeId(albumtype.getTypeId()); //保存 类别id
else
albuminfo.setAlbumTypeId(0);
}
Node liNode5 = liNL.elementAt(4); //关键词 liNode5.getFirstChild().getText()
pageParser=Parser.createParser(liNode5.toHtml(), Constant.CHARSET_GB2312);
NodeList a3NL=pageParser.parse(FILTER_A);
if(a3NL!=null && a3NL.size()>0){
for(int i=0;i<a3NL.size();i++){
LinkTag a3Link=(LinkTag) a3NL.elementAt(i);
//System.out.println("关键词:"+ a3Link.getLinkText());
KeyWord keyword=new KeyWord();
if(keyword != null){
keyword.setKeyName(a3Link.getLinkText());
keywordList.add(keyword);
}
}
}
Node liNode6 = liNL.elementAt(5); //创建时间 liNode6.getFirstChild().getText()
pageParser=Parser.createParser(liNode6.toHtml(), Constant.CHARSET_GB2312);
NodeList apanNL=pageParser.parse(FILTER_SPAN);
if(apanNL!=null && apanNL.size()>0){
Node spanNode= apanNL.elementAt(0);
if(spanNode != null)
albuminfo.setCreateTime(new MySuperDate(spanNode.getFirstChild().getText()).getDate());
else
albuminfo.setCreateTime(new Date());
//System.out.println("上传时间:"+ spanNode.getFirstChild().getText());
}
Node liNode7 = liNL.elementAt(6); //by yuxiong 专辑介绍
pageParser=Parser.createParser(liNode7.toHtml(), Constant.CHARSET_GB2312);
NodeList fullcontentNL=pageParser.parse(FILTER_DIV_FULLCONTENT);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -