⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 dangdangparser.java

📁 本系统实现了从五个网站上搜索的图书进行整合后
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	* Description:  获得图书出版时间
	* Calls: no
	* Called By:  mainService
	* @param bookElement as Element
	* @return String
	* @throws no
	*/
	public  String getBookPublishTime(Element bookElement){
		String bookPublishTime = null;
        if(bookElement.hasChildNodes()){
			String tem = bookElement.getFirstChild().getNodeValue();
			if(tem.indexOf("出版时间") != -1&&tem.indexOf("年") != -1&&tem.indexOf("月") != -1)
			  bookPublishTime = tem.substring(tem.indexOf("出版时间")+5, tem.indexOf("年"))
			                  + "-"
			                  + tem.substring(tem.indexOf("年")+1, tem.indexOf("月"))
			                  + "-00";
        }
		//System.out.println(bookPublishTime);
		return bookPublishTime;
	}
	/**
	* Function:  getBookPrice
	* Description:  获得图书价格
	* Calls: no
	* Called By:  mainService
	* @param bookElement as Element
	* @return String
	* @throws no
	*/
	public  String getBookPrice(Element bookElement){
	
		String bookPrice = "";	
		NodeList priceNode = bookElement.getChildNodes();
		if(priceNode.getLength()>2){
			Node allPriceNode = priceNode.item(2);
	        if(allPriceNode.getNodeType() == Node.ELEMENT_NODE){
	          Element temElement = (Element)allPriceNode;
	          if(temElement.hasChildNodes())
			      bookPrice = temElement.getFirstChild().getNodeValue();
	        }
		}
		//System.out.println(bookPrice);
		bookPrice = bookPrice.replace("¥", "");
		bookPrice = bookPrice.replace(" ", "");
		bookPrice = bookPrice.replace(",", "");
		bookPrice = bookPrice.replace(",", "");
//		if(bookPrice.length()>=1){
//			bookPrice = bookPrice.substring(1);
//		}
		return bookPrice.trim();
	}

	/**
	* Function:  getBookDiscount
	* Description:  获得图书折扣
	* Calls: no
	* Called By:  mainService
	* @param bookElement as Element
	* @return String
	* @throws no
	*/
	public String getBookDiscount(Element bookElement) {
		
		String bookDiscount = "";		
		NodeList priceNode = bookElement.getChildNodes();
        for(int i = 0;i<priceNode.getLength();i++){
			
			Node allPriceNode = priceNode.item(i);
			if(allPriceNode.getNodeType() == Node.ELEMENT_NODE){
				continue;
			}else{
				bookDiscount += allPriceNode.getNodeValue();
			}
		}
        /*取出折扣并去掉中文 转化成0.xx格式*/
        if(bookDiscount.indexOf("折扣:") != -1){
        	bookDiscount = bookDiscount.substring(bookDiscount.indexOf("折扣:") + 3);
        }
    	if(bookDiscount.indexOf("折") != -1)
		       bookDiscount = "0."+bookDiscount.substring(0, 
				               bookDiscount.indexOf("折")).trim();
        bookDiscount = bookDiscount.replace(" ", "");
		/*转化成0.xx格式*/
		//bookDiscount = String.valueOf(Integer.valueOf(bookDiscount)/100.0);
		//System.out.println(bookDiscount);
		return bookDiscount;
	}
	/**
	* Function:  getBookFixPrice
	* Description:  获得图书定价
	* Calls: no
	* Called By:  mainService
	* @param bookElement as Element
	* @return String
	* @throws no
	*/
	public String getBookFixPrice(Element bookElement) {
		String bookFixPrice = "";
		if(bookElement.hasChildNodes()){
			Node tempNode = bookElement.getFirstChild();
			if(tempNode.getNodeType() == Node.ELEMENT_NODE){
				Element tempElement = (Element)tempNode;
				if(tempElement.hasChildNodes()){
					bookFixPrice = tempElement.getFirstChild().getNodeValue();
				}
			}
		}
		//bookFixPrice = ((Element)bookElement.getFirstChild()).getFirstChild().getNodeValue().trim();
        bookFixPrice = bookFixPrice.replace("¥", "");
        bookFixPrice = bookFixPrice.replace(",", "");
        bookFixPrice = bookFixPrice.replace(",", "");
        if(bookFixPrice.length()>1)
        	bookFixPrice = bookFixPrice.trim();
		//System.out.println(bookFixPrice);
		return bookFixPrice;
	}
	/**
	* Function:  getBookUrl
	* Description:  获得图书详细信息地址
	* Calls: no
	* Called By:  mainService
	* @param bookElement as Element
	* @return String
	* @throws no
	*/
	public String getBookUrl(Element bookElement) {
		String bookUrl = "";
		if(bookElement.hasChildNodes()){
			Node firstNode = bookElement.getFirstChild(); 
			if(Node.ELEMENT_NODE == firstNode.getNodeType()){
			    Element firstElement = (Element)firstNode; 
			    if(firstElement.hasAttribute("href"))
			        bookUrl = "http://search.dangdang.com/"+firstElement.getAttribute("href");
			}
		}
		//System.out.println(bookUrl);
		return bookUrl.trim();
	}
	/**
	* Function:  getBookContent
	* Description:  获得图书详细内容
	* Calls: no
	* Called By:  mainService
	* @param bookElement as Element
	* @return String
	* @throws no
	*/
	public String getBookContent(Element bookElement) {
		String bookContent = "";
		NodeList tempList = bookElement.getChildNodes();
		for(int i = 0;i<tempList.getLength();i++){
			Node tempNode = tempList.item(i);
			if(tempNode.getNodeType() == Node.ELEMENT_NODE){
				Element tempElement = (Element)tempNode;
				if(tempElement.hasChildNodes())
				    bookContent += tempElement.getFirstChild().getNodeValue();
			}else{
				bookContent += tempNode.getNodeValue();
			}
		}
        bookContent = bookContent.replace(" ", "");
		if(null != bookContent){
			if(bookContent.length()>255){
				bookContent = bookContent.substring(0, 255).trim();
			}
		}
		return bookContent;
	}
	/**
	* Function:  getNextPageUrl
	* Description:  获得下一页超链接地址
	* Calls: no
	* Called By:  no
	* @param doc as Document
	* @return String
	* @throws no
	*/
	public String getNextPageUrl(Document doc) {
		/*初始化为no,表示没有下一页*/
		String nextpageUrl = "no";
		
		NodeList divList = doc.getElementsByTagName("div");
		for(int i = 0;i<divList.getLength();i++){
			Node temNode = divList.item(i);
			if(temNode.getNodeType()==Node.ELEMENT_NODE){
				Element temElement = (Element)temNode;
				/*过滤出分页工具栏标签<div id="divBottomPageNavi"...>*/
				if("divBottomPageNavi".equals(temElement.getAttribute("id"))){
					if(temElement.hasChildNodes()){
						Node spanNode = temElement.getFirstChild();
						NodeList aList = spanNode.getChildNodes();
						for(int j = 0;j<aList.getLength();j++){
							Node aNode = aList.item(j);
							if(aNode.getNodeType()==Node.ELEMENT_NODE){
								Element aElement = (Element)aNode;
	                            /*如果<a name=link_page_next...>标签,表明还有下一页*/
								if("link_page_next".equals(aElement.getAttribute("name"))){
									nextpageUrl = "http://search.book.dangdang.com/"+aElement.getAttribute("href");
									break;
								}
							}
						}
					}
				}
			}
		}
		//System.out.println(nextpageUrl);
		return nextpageUrl;
	}
	
	public long getRecordNum(Document doc) {
		
		/*初始化为0*/
		long num = 0;
			NodeList servers = doc.getElementsByTagName("div");
			for (int i = 0; i < servers.getLength(); i++) {

				Element serveritem = (Element) servers.item(i);
				if ("l".equals(serveritem.getAttribute("class"))) {
				
					NodeList childList1 = serveritem.getChildNodes();
					boolean flag = false;		
					for (int j = 0; j < childList1.getLength(); j++) {
						Node spanNode = childList1.item(j);                       
						if (spanNode.getNodeType() == Node.ELEMENT_NODE){
							
							Element spanElement = (Element)spanNode;
							if(!flag){
								if("SPAN".equals(spanElement.getNodeName()))
									flag = true;
								else 
									continue;
							}else{
								if(spanElement.hasChildNodes()){
									String strNum = spanElement.getFirstChild().getNodeValue();
									if(strNum.length()>0)
									    num = Long.valueOf(strNum.trim());
								}
							}
						}
					}
				}
		}
		//System.out.println(num);
		return num;
	}
	/**
	 * Class:DangdangparserSec
	 * Description: 根据每本书详细信息的url去请求某本图书的isbn
	 * extens:no
	 * implements:no
	 * @author  feng guang
	 * @since   11/09/08
	 */
	public class DangdangparserSec{
		/**
		* Function:  getBookISBNSec
		* Description:  根据每本书详细信息的url去请求某本图书的isbn
		* Calls: no
		* Called By:  this.mainService(Document doc,boolean flag)
		* @param url as String
		* @return string
		* @throws Exception
		*/
		public String getBookISBNSec(String url) throws Exception{

			    String bookISBN = "";
	    	    /* 生成html 解析器 */
				DOMParser parser = new DOMParser();
				/* 设置网页的默认编码 */
				parser.setProperty(
						"http://cyberneko.org/html/properties/default-encoding",
						"gb2312");

				URL u = new URL(url);
				/* 建立与源网站的连接 */
				URLConnection urlconn = u.openConnection();
				//urlconn.connect();
				urlconn.setReadTimeout(30000);
				//urlconn.setConnectTimeout(30000);
				/* 获得源网站的字节流,并转化为字符流,设置编码为utf-8 */
				BufferedReader in = new BufferedReader(new InputStreamReader(urlconn
						.getInputStream(), "gb2312"));
				/* 进行解析,转化为xml */
				parser.parse(new InputSource(in));
				/* 转化为dom对象 */
				Document doc = parser.getDocument();
				
				/* 取得所有<ul>结点 */		    	
		    	   NodeList list = doc.getElementsByTagName("ul");		    	   
		    	   for(int i = 0;i<list.getLength();i++){
		    		   Element childElement = (Element)list.item(i);
		    		   if (!"nostyle".equals(childElement.getAttribute("class"))) {
		      				continue;
		      			} else {
		      				NodeList list1 = childElement.getChildNodes();
		      				for(int j = 0;j<list1.getLength();j++){
		      					Node liNode = list1.item(j);
		      					if(liNode.getNodeType() == Node.ELEMENT_NODE){
		      						Element liElement = (Element)liNode;
		      						if(liElement.hasChildNodes()&&null != liElement.getFirstChild().getNodeValue())
		      						    if(liElement.getFirstChild().getNodeValue().indexOf("I S B N    :") != -1){
		      							    bookISBN = liElement.getFirstChild().getNodeValue();
		      						        break;
		      						    }
		      					}
		      					
		      				}
		      				break;
		      			}
		    	   }	
		    	   //System.out.println(bookISBN);
     			   bookISBN = bookISBN.replace(" ", "");
     			   if(bookISBN.length()>=5){
     			       bookISBN = bookISBN.substring(5, bookISBN.length());
     			   }
     			   //System.out.println("<<" + bookISBN + ">>");
			       return bookISBN;
		}
		
	}
	public String getBookISBN(Element bookElement) {

		return null;
	}
	public Price getDetailInfo(Document doc) {

		return null;
	}
	public static void main(String args[])throws Exception{
		Dangdangparser dangDang = new Dangdangparser();
		long beginTime = System.currentTimeMillis();
		Document doc = dangDang.nekohtmlParser("http://search.book.dangdang.com/search.aspx?key=java%B1%E0%B3%CC%CB%BC%CF%EB");
    	ArrayList <Book> list = dangDang.mainService(doc,true);
    	Iterator it = list.iterator();
    	while(it.hasNext()){
    		Book temp = (Book)it.next();
    		//if(temp.getBookName().indexOf("――")!=-1)
    		System.out.println(temp.getBookName() + ">>" + temp.getBookISBN()
    				+ ">>" + temp.getBookAuthor()+">>"+temp.getBookFixPrice()+">>" +temp.getBookImage()
    				+ ">>" + temp.getBookPublisher()+">>"+temp.getBookPublishTime()+">>" + temp.getBookProspectus()
    				+ ">>" + temp.getPrice().getDangdangDiscount() + ">>" +temp.getPrice().getDangdangPrice()
    				+ ">>" + temp.getPrice().getDangdangUrl());
    		//System.out.println(">>>>>>>>>>>>>>"+temp.getBookContent());
    		
    	}
    	System.out.println(System.currentTimeMillis() - beginTime);
		//System.out.println(dangDang.getNextPageUrl(doc));
		//dangDang.getRecordNum(doc);
	}


}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -