⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 weilanparser.java

📁 本系统实现了从五个网站上搜索的图书进行整合后
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	 * @return String
	 */
	public String getBookDiscount(Element bookElement) {
		String discount = "";
		NodeList divList = bookElement.getChildNodes();
		for(int i = 0;i<divList.getLength();i++){
			Node divNode = divList.item(i);
			if(divNode.getNodeType() == Node.ELEMENT_NODE){
				Element divElement = (Element)divNode;
				if("priceinfo".equals(divElement.getAttribute("class"))){
					NodeList aList = divElement.getChildNodes();
					for(int j = 0;j<aList.getLength();j++){
						Node aNode = aList.item(j);
						if(aNode.getNodeType() == Node.ELEMENT_NODE){
							Element aElement = (Element)aNode;
							if("discount".equals(aElement.getAttribute("class"))){
								if(aElement.hasChildNodes()&&null != aElement.getFirstChild().getNodeValue()){
									discount = aElement.getFirstChild().getNodeValue().trim();
									break;
								}
							}
						}
					}
					break;
				}
			}
		}
        if(discount.length()>0){
        	discount = discount.replace("¥", "").trim();
        	discount = discount.replace("折扣:", "").trim();
        	discount = discount.replace("折", "").trim();
        	discount = "0." + discount;
        }
		//System.out.println(discount);
		return discount;
	}
					    	
	/**Function:getBookImage
	 * Description:获得图书图片
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookImage(Element bookElement) {
		String bookImg = "";
		NodeList aList = bookElement.getChildNodes();

		for(int i = 0;i<aList.getLength();i++){
			Node aNode = aList.item(i);
			if("A".equals(aNode.getNodeName())){
					
				Element aElement = (Element)aNode;
				NodeList imgList = aElement.getChildNodes();
				for(int j =0;j<imgList.getLength();j++){
					Node imgNode = imgList.item(j);
					if(Node.ELEMENT_NODE == imgNode.getNodeType()
							&&"IMG".equals(imgNode.getNodeName())){
						Element imgElement = (Element)imgNode;
						if(imgElement.hasAttribute("src"))
						    bookImg = imgElement.getAttribute("src");
						break;
					}
				}
				break;
			}
		}
        if(bookImg.length()>0)
        	bookImg = bookImg.trim();
		//System.out.println(bookImg);
		return bookImg;
	}
	/**Function:getBookName
	 * Description:获得图书名字
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookName(Element bookElement) {
		
		String bookName="";
		NodeList proNodeList = bookElement.getChildNodes();
		for(int j  = 0;j<proNodeList.getLength();j++){
			Node proNode = proNodeList.item(j);
			if("A".equals(proNode.getNodeName())){
				NodeList aNodeList = proNode.getChildNodes();
				for(int i=0;i<aNodeList.getLength();i++){		
					Node aNode =aNodeList.item(i);
				    if(aNode.getNodeType() == Node.ELEMENT_NODE){
				    	if(aNode.hasChildNodes()&&null != aNode.getFirstChild().getNodeValue())
				    	    bookName += aNode.getFirstChild().getNodeValue().trim();
				    }else if(null != aNode.getNodeValue()){
				    	bookName += aNode.getNodeValue().trim();
				    }
				}
				break;
			}
		}
		//System.out.println(bookName);
		return bookName;
	}
	/**Function:getBookFixPrice
	 * Description:获得图书市场价格
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookFixPrice(Element bookElement) {
		String bookFixPrice="";
		NodeList divList = bookElement.getChildNodes();
		for(int i = 0;i<divList.getLength();i++){
			Node divNode = divList.item(i);
			if(divNode.getNodeType() == Node.ELEMENT_NODE){
				Element divElement = (Element)divNode;
				if("priceinfo".equals(divElement.getAttribute("class"))){
					NodeList aList = divElement.getChildNodes();
					for(int j = 0;j<aList.getLength();j++){
						Node aNode = aList.item(j);
						if(aNode.getNodeType() == Node.ELEMENT_NODE){
							Element aElement = (Element)aNode;
							if("oprice".equals(aElement.getAttribute("class"))){
								if(aElement.hasChildNodes())
								    bookFixPrice = aElement.getFirstChild().getNodeValue();
								break;
							}
						}
					}
					break;
				}
			}
		}
		if(bookFixPrice.length()>0)
			bookFixPrice = bookFixPrice.trim().replace("¥", "");
		return bookFixPrice;
	}
	/**Function: getBookPrice
	 * Description:获得蔚蓝网图书价格
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookPrice(Element bookElement) {
		String bookPrice ="";
		NodeList divList = bookElement.getChildNodes();
		for(int i = 0;i<divList.getLength();i++){
			Node divNode = divList.item(i);
			if(divNode.getNodeType() == Node.ELEMENT_NODE){
				Element divElement = (Element)divNode;
				if("priceinfo".equals(divElement.getAttribute("class"))){
					NodeList aList = divElement.getChildNodes();
					for(int j = 0;j<aList.getLength();j++){
						Node aNode = aList.item(j);
						if(aNode.getNodeType() == Node.ELEMENT_NODE){
							Element aElement = (Element)aNode;
							if("sprice".equals(aElement.getAttribute("class"))){
								if(aElement.hasChildNodes())
								    bookPrice = aElement.getFirstChild().getNodeValue();
								break;
							}
						}
					}
					break;
				}
			}
		}
		if(bookPrice.length()>0){
			bookPrice = bookPrice.replace("¥", "");
			bookPrice = bookPrice.replace("节省:", "");
			bookPrice = bookPrice.trim();
		}
		return bookPrice;
	}
	/**Function:getBookPublishTime
	 * Description:获得蔚蓝网图书价格
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookPublishTime(Element bookElement) {
		String bookPublishTime = null;
		
		NodeList aList = bookElement.getChildNodes();
		for(int i = 0;i<aList.getLength();i++){
			Node aNode = aList.item(i);
			if(null != aNode.getNodeValue()&&!"".equals(aNode.getNodeValue())){
				if(aNode.getNodeValue().indexOf("出版日期:")!= -1){
					String temStr = aNode.getNodeValue().trim();
					bookPublishTime = temStr.substring(temStr.indexOf("出版日期:") + 5);
				}
			}
		}
		//System.out.println(bookPublishTime);
		return bookPublishTime;
	}
	/**Function:getBookPublisher
	 * Description:获得出版社
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookPublisher(Element bookElement) {
		String bookPublisher="";
		
		NodeList aList = bookElement.getChildNodes();
		for(int i = 0;i<aList.getLength();i++){
			Node aNode = aList.item(i);
			if(null != aNode.getNodeValue()&&!"".equals(aNode.getNodeValue())){
				if(aNode.getNodeValue().indexOf("出版社")!= -1){
					i++;
					if(i<aList.getLength()){
						Node publisherNode = aList.item(i);
						if("A".equals(publisherNode.getNodeName())){
							Element publisherElement = (Element)publisherNode;
							if(publisherElement.hasChildNodes()
									&&null != publisherElement.getFirstChild().getNodeValue())
							   bookPublisher = publisherElement.getFirstChild().getNodeValue().trim();
							break;
						}
					}
				}
			}
		}
		//System.out.println(bookPublisher);
		return bookPublisher;
	}
	/**Function:getBookContent
	 * Description:获得详图书简介
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	
	public String  getBookContent(Element bookElement) {
		String bookContent = "";
		//try{
			NodeList temList = bookElement.getChildNodes();
			for(int i = 0;i<temList.getLength();i++){
				Node node = temList.item(i);
				if(node.getNodeType() == Node.ELEMENT_NODE){
					Element element = (Element)node;
					if(element.hasChildNodes())
					    bookContent += element.getFirstChild().getNodeValue();
				}else{
					bookContent += node.getNodeValue();
				}
			}
		//}catch(Exception e){
		//	bookContent = "";
		//}
		bookContent = bookContent.replace("\r", "");
		bookContent = bookContent.replace("\n", "");
		bookContent = bookContent.replace(" ", "");
		return bookContent;
	}
	
	/**Function:getBookUrl
	 * Description:获得详细图书链接
	 * Call:no
	 * Called by:mainService
	 * @param bookElement as Element	 
	 * @throws no
	 * @return String
	 */
	public String getBookUrl(Element bookElement) {		
		String bookUrl = null;
		NodeList aList = bookElement.getChildNodes();
			
		for(int i = 0;i<aList.getLength();i++){
			Node aNode = aList.item(i);
			if("A".equals(aNode.getNodeName())){
					
				Element aElement = (Element)aNode;
				if(aElement.hasAttribute("href"))
				    bookUrl = aElement.getAttribute("href");
				break;
			}
		}
		return bookUrl;
	}
	 public  class WeiLanParserSec {
	        
	       public String getBookISBNSec( String url) throws IOException, SAXException{
	    	    String  bookISBN = "";
		    	// 生成html parse
		   		DOMParser parser = new DOMParser();
		   		// 设置网站默认编码
		   		parser.setProperty(
		   				"http://cyberneko.org/html/properties/default-encoding",
		   				"UTF-8");
		   		URL u = new URL(url);
		   		/* 建立与源网站的连接 */
		   		URLConnection urlConnection = u.openConnection();
		   		urlConnection.setReadTimeout(30000);
		   		//urlConnection.setConnectTimeout(30000);
		   		//urlConnection.connect();
		   		/* 获得源网站的字节流,并转化为字符流,设置编码为gb2312 */
		   		BufferedReader inputStream = new BufferedReader(new InputStreamReader(
		   				urlConnection.getInputStream(), "UTF-8"));
		   		 /*进行解析,转化为xml*/
		   		parser.parse(new InputSource(inputStream));
		   		 /*转化为dom对象*/
		   		Document doc = parser.getDocument();
		   		
		   		/*取得div节点*/
		   		if(null != doc){
			   		NodeList divNodeList = doc.getElementsByTagName("DIV");
			   		for(int i=0;i<divNodeList.getLength();i++){
			   			Node divNode = divNodeList.item(i);
			   			if(divNode.getNodeType() == Node.ELEMENT_NODE){
			   				Element divElement = (Element)divNode;
			   				if("product_chanshu".equals(divElement.getAttribute("class"))){
			   					NodeList divList = divElement.getChildNodes();
			   					for(int j = 0;j<divList.getLength();j++){
			   						Node tempNode = divList.item(j);
			   						if(tempNode.getNodeType() == Node.ELEMENT_NODE){
			   							Element tempElement = (Element)tempNode;
			   							if(tempElement.hasChildNodes()
			   									&&null != tempElement.getFirstChild().getNodeValue())
			   							if(tempElement.getFirstChild().getNodeValue().indexOf("ISBN")!= -1){
			   								j++;
			   								if(j<divList.getLength()){
			   									Node isbnNode = divList.item(j);
			   									if(isbnNode.getNodeType() == Node.ELEMENT_NODE){
			   										Element isbnElement = (Element)isbnNode;
			   										if(isbnElement.hasChildNodes())
			   										    bookISBN = isbnElement.getFirstChild().getNodeValue();
			   										break;
			   									}
			   								}
			   							}
			   						}
			   					}
			   					break;
			   				}
			   			}
			   		}
		   		}
		   		//System.out.println(bookISBN);
		   		return bookISBN;
	       }
	  }
	public static void main(String[] args) throws Exception{
		WeilanParser wp = new WeilanParser();	
		long beginTime = System.currentTimeMillis();
		Document doc = wp
				.nekohtmlParser("http://search.wl.cn/search.aspx?q=java&producttype=0&index=1");
		//Price price = wp.getDetailInfo(doc);
		//System.out.println(price.getWeilanDiscount() + ">>" + price.getWeilanPrice() + price.getWeilanUrl());
		ArrayList<Book> list = wp.mainService(doc,true);
		Iterator<Book> it = list.iterator();
		while(it.hasNext()){
			Book book =(Book) it.next();
			System.out.println(">>"+book.getBookISBN()+">>"
					+book.getBookAuthor()+">>"+book.getBookPublishTime()+">>"+book.getBookPublisher()
					+">>"+book.getPrice().getWeilanPrice()+">>"+book.getPrice().getWeilanDiscount()
					+">>"+book.getPrice().getWeilanUrl()+">>"+book.getBookProspectus()+">>"
					+book.getBookFixPrice()	+">>"+book.getBookImage()+">>"+book.getBookName());
		}
//		System.out.println(System.currentTimeMillis() - beginTime);
		System.out.println(wp.getNextPageUrl(doc));	
//		System.out.println(wp.getRecordNum(doc));
	}

	public String getBookISBN(Element bookElement) {
		return null;
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -