⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 tsinghuaparser.java

📁 本系统实现了从五个网站上搜索的图书进行整合后
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
	public String getBookAuthor(Element bookElement) {
		
		String bookAuthor = "";
	    if(bookElement.hasChildNodes())
		    bookAuthor = bookElement.getFirstChild().getNodeValue();
		if(null!=bookAuthor)
		  bookAuthor=bookAuthor.replaceAll(" ", "");
		
		if(bookAuthor.length()>64)
		  bookAuthor=bookAuthor.substring(0, 63);
		
        bookAuthor = bookAuthor.replace(",", " ");
        bookAuthor = bookAuthor.replace(",", " ");
        bookAuthor = bookAuthor.replace(";", " ");
        bookAuthor = bookAuthor.replace("、", " ");
        bookAuthor = bookAuthor.replace("等", "");
		return bookAuthor;

	}

	/**
	 * Function: getBookPublishTime Description: 获得图书出版时间 Calls: no Called
	 * By:mainService
	 * 
	 * @param bookElement
	 *            as Element
	 * @return String
	 * @throws no
	 */
	public String getBookPublishTime(Element bookElement) {

		String bookPublishTime = "";
        if(bookElement.hasChildNodes())
			bookPublishTime = bookElement.getFirstChild().getNodeValue();		
        if(bookPublishTime.length()>0)
        	bookPublishTime = bookPublishTime.replace(" ", "");
		return bookPublishTime;
	}

	/**
	 * Function: getBookPrice 
	 * Description: 获得图书价格 
	 * Calls: no 
	 * Called By:mainService
	 * @param bookElement as Element
	 * @return String
	 * @throws no
	 */
	public String getBookPrice(Element bookElement) {
		String bookPrice ="";
		if(bookElement.hasChildNodes())
			bookPrice = bookElement.getFirstChild().getNodeValue();
		if(bookPrice.length()>0)
			bookPrice = bookPrice.replace(" ", "");
		return bookPrice;
	}
	/**
	 * Function: getBookISBN 
	 * Description: 获得图书ISBN 
	 * Calls: no Called
	 * By:mainService
	 * @param bookElement as Element
	 * @return String
	 * @throws no
	 */
	public String getBookISBN(Element bookElement) {
		String bookISBN = "";
		if(bookElement.hasChildNodes()){
			bookISBN = bookElement.getFirstChild().getNodeValue();
		}
	    if(bookISBN.length()>0)
	    	bookISBN = bookISBN.replace(" ", "");
		return bookISBN;
	}
	/**
	 * Function: getBookUrl 
	 * Description: 获得图书详细信息地址 
	 * Calls: no Called
	 * By:mainService
	 * @param bookElement as Element
	 * @return String
	 * @throws no
	 */
	public String getBookUrl(Element bookElement) {

		String bookUrl = "";
        if(bookElement.hasChildNodes()
        		&&Node.ELEMENT_NODE == bookElement.getFirstChild().getNodeType()){
			Element firstElement = (Element) bookElement.getFirstChild();
			if(firstElement.hasAttribute("href"))
			    bookUrl = "http://www.tup.tsinghua.edu.cn/book/"
					    + firstElement.getAttribute("href");
			if(bookUrl.length()>0)
				bookUrl = bookUrl.replace(" ", "");
        }
		return bookUrl;
	}

	public String getBookDetailMesg(Element bookElement) {
		return null;
	}
	public String getBookContent(Element bookElement) {		
		return null;
	}
	public String getBookImage(Element bookElement) {
		return null;
	}
	public String getBookPublisher(Element bookElement) {
		return null;
	}
	public String getBookDiscount(Element bookElement) {
		return null;
	}
	public String getBookFixPrice(Element bookElement) {	
		return null;
	}

	/**
	 * Function: getNextPageUel 
	 * Description: 用nekohtml解析器解析指定网页,并转化为dom对象
	 * Calls:no 
	 * Called By: no
	 * @param doc as Document
	 * @return String pageUrl
	 * @throws no
	 */
	public String getNextPageUrl(Document doc) {

		String pageUrl = "no";
		NodeList servers = doc.getElementsByTagName("table");

		for (int i = 0; i < servers.getLength(); i++) {
            if(Node.ELEMENT_NODE == servers.item(i).getNodeType()){
				Element serveritem = (Element) servers.item(i);
				if ("40".equals(serveritem.getAttribute("top"))) {
	
					NodeList bookList = serveritem.getChildNodes();
					for (int j = 0; j < bookList.getLength(); j++){
						Node trNode = bookList.item(j);
						if (Node.ELEMENT_NODE == trNode.getNodeType()&&"TR".equals(trNode.getNodeName())) {
							/* 如果是元素结点(<td>),取出其中的文本值 */
							Element trElement = (Element) trNode;
							if(trElement.hasChildNodes()){
								NodeList tdNodeList = trElement.getChildNodes();
								for(int k = 0;k<tdNodeList.getLength();k++){
									Node tdNode = tdNodeList.item(k);
									if(Node.ELEMENT_NODE == tdNode.getNodeType()&&"TD".equals(tdNode.getNodeName())){
										NodeList aNodeList = tdNode.getChildNodes();
										for(int m = 0;m<aNodeList.getLength();m++){
											Node aNode = aNodeList.item(m);
											if(Node.ELEMENT_NODE == aNode.getNodeType()&&"A".equals(aNode.getNodeName())){
												NodeList imgNodeList = aNode.getChildNodes();
												for(int n = 0;n<imgNodeList.getLength();n++){
													Node imgNode = imgNodeList.item(n);
													if(Node.ELEMENT_NODE == imgNode.getNodeType()&&"IMG".equals(imgNode.getNodeName())){
														if("显示下一页".equals(((Element)imgNode).getAttribute("alt"))){
															pageUrl = "http://www.tup.tsinghua.edu.cn"
															    	+ ((Element) aNode).getAttribute("href");
															break;
														}
													}
												}
											}
										}										
									}
								}
							}
							break;
						}
					}
				}
            }
		}
		return pageUrl;
	}
	/**
	 * Function: getRecordNum 
	 * Description: 获得记录总数
	 * Calls:no 
	 * Called By: no
	 * @param doc as Document
	 * @return long
	 * @throws no
	 */
	public long getRecordNum(Document doc) {

		String pageUrl =null;
		long num = 0;
		NodeList servers = doc.getElementsByTagName("table");

		for (int i = 0; i < servers.getLength(); i++) {
            if(Node.ELEMENT_NODE == servers.item(i).getNodeType()){
				Element serveritem = (Element) servers.item(i);
				if ("40".equals(serveritem.getAttribute("top"))) {
	
					NodeList bookList = serveritem.getChildNodes();
					for (int j = 0; j < bookList.getLength(); j++){
						Node trNode = bookList.item(j);
						if (Node.ELEMENT_NODE == trNode.getNodeType()&&"TR".equals(trNode.getNodeName())) {
							/* 如果是元素结点(<td>),取出其中的文本值 */
							Element trElement = (Element) trNode;
							if(trElement.hasChildNodes()){
								NodeList tdNodeList = trElement.getChildNodes();
								for(int k = 0;k<tdNodeList.getLength();k++){
									Node tdNode = tdNodeList.item(k);
									if(Node.ELEMENT_NODE == tdNode.getNodeType()&&"TD".equals(tdNode.getNodeName())){
										NodeList aNodeList = tdNode.getChildNodes();
										for(int m = 0;m<aNodeList.getLength();m++){
											Node aNode = aNodeList.item(m);
											if(Node.ELEMENT_NODE == aNode.getNodeType()&&"A".equals(aNode.getNodeName())){
												NodeList imgNodeList = aNode.getChildNodes();
												for(int n = 0;n<imgNodeList.getLength();n++){
													Node imgNode = imgNodeList.item(n);
													if(Node.ELEMENT_NODE == imgNode.getNodeType()&&"IMG".equals(imgNode.getNodeName())){
														if("显示最后一页".equals(((Element)imgNode).getAttribute("alt"))){
															pageUrl = ((Element) aNode).getAttribute("href");
															if(pageUrl.indexOf("page=") != -1)
																num = (Integer.valueOf(pageUrl.substring(pageUrl.indexOf("page=") + 5)) - 1)*20;
															break;
														}
													}
												}
											}
										}
									}
								}
							}
							break;
						}
					}
				}
            }
		}
		return num;
	}

	/* 测试方法 */
	public static void main(String[] args) throws Exception {
		 Tsinghuaparser a = new Tsinghuaparser();
		 Document doc = a.nekohtmlParser("http://www.tup.com.cn/book/search.asp?keyword=java");
		 System.out.println(a.getNextPageUrl(doc));
		 System.out.println(a.getRecordNum(doc));
		 //Price price = a.getDetailInfo(doc);
		 //System.out.println(price.getTsinghuaDiscount() + ">>" + price.getTsinghuaPrice() + price.getTsinghuaUrl());
//		 a.getRecordNum(b);
//		 ArrayList<Book> tem = a.mainService(b,true);
//		 Iterator it = tem.iterator();
//		 while(it.hasNext()){
//		 Book temp = (Book)it.next();
//		 Price price = temp.getPrice();
//		 System.out.println(price.getTsinghuaPrice()+">>"+price.getTsinghuaDiscount()+price.getTsinghuaUrl());
//					
//		 }
	}



}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -