📄 china_pubparser.java
字号:
}
}
return price;
}
/**
* Function: getBookUrl
* Description: 取出图书详细信息地址
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookUrl(String temStr) {
String bookUrl = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookUrl = "http://www.china-pub.com/" + temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
//System.out.println(bookUrl);
return bookUrl;
}
/**
* Function: getBookDiscount
* Description: 取出图书折扣
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookDiscount(String temStr) {
String bookDiscount = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookDiscount = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
return bookDiscount;
}
/**
* Function: getBookAuthor
* Description: 取出图书作者
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookAuthor(String temStr) {
String bookAuthor = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookAuthor = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
bookAuthor = bookAuthor.replace(" ", "");
bookAuthor = bookAuthor.replace(",", " ");
bookAuthor = bookAuthor.replace(",", " ");
bookAuthor = bookAuthor.replace(";", " ");
return bookAuthor;
}
/**
* Function: getBookImage
* Description: 取出图书封面图片地址
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookImage(String temStr) {
String bookImage = "";
String tem = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
tem = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
bookImage = "http://images.china-pub.com"+ tem + "/cover.jpg";
return bookImage;
}
/**
* Function: getBookImage
* Description: 取出图书封面图片地址
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookName(String temStr) {
String bookName = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookName = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
return bookName;
}
/**
* Function: getBookFixPrice
* Description: 取出图书市场定价
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookFixPrice(String temStr) {
String bookPrice = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookPrice = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
return bookPrice;
}
/**
* Function: getBookPublishTime
* Description: 取出图书出版时间
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookPublishTime(String temStr) {
String bookPublishTime = null;
if(temStr.indexOf(":'") != -1&&temStr.length()>10)
bookPublishTime = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-8);
return bookPublishTime;
}
/**
* Function: getBookISBN
* Description: 取出图书ISBN
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookISBN(String temStr) {
String bookISBN = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookISBN = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
return bookISBN;
}
/**
* Function: getBookPublisher
* Description: 取出图书出版社
* Calls: no
* Called By: mainService
* @param temStr as String
* @return String
* @throws no
*/
public String getBookPublisher(String temStr) {
String bookPublisher = "";
if(temStr.indexOf(":'") != -1&&temStr.length()>3)
bookPublisher = temStr.substring(temStr.indexOf(":'")+2,temStr.length()-1);
return bookPublisher;
}
public String getBookPrice(String bookElement) {
return null;
}
public String getBookContent(String bookElement) {
return null;
}
/**
* Function: getNextPageUrl
* Description: 取出下一页链接
* Calls: no
* Called By: processService.startMultithread()
* @param doc as Document
* @return String
* @throws no
*/
public String getNextPageUrl(Document doc) {
/* 初始化为no,表示没有下一页 */
String nextPageUrl = "no";
Node divNode = null;
Element divElement = null;
NodeList divList = doc.getElementsByTagName("div");
/*把divNodeList中的节点保存在依次存入divNode中*/
for (int i = 0; i < divList.getLength(); i++) {
divNode = divList.item(i);
/*将divNode类型转换成divElement类型*/
if(divNode.getNodeType()==Node.ELEMENT_NODE){
divElement = (Element)divNode;
/*选出 属性id是fanye的divElement*/
if("fanye".equals(divElement.getAttribute("id"))){
NodeList aList = divElement.getChildNodes();
for(int j = 0;j<aList.getLength();j++){
Node aNode = aList.item(j);
if(aNode.getNodeType() == Node.ELEMENT_NODE){
Element aElement = (Element)aNode;
if("FONT".equals(aElement.getNodeName())){
j++;
if(j<aList.getLength()){
Node tempNode = aList.item(j);
if("A".equals(tempNode.getNodeName())){
Element temElement = (Element)tempNode;
nextPageUrl = "http://www.china-pub.com/s/"+temElement.getAttribute("href");
}
}
break;
}
}
}
break;
}
}
}
return nextPageUrl;
}
public long getRecordNum(Document doc) {
long num = 0;
NodeList servers = doc.getElementsByTagName("div");
for (int i = 0; i < servers.getLength(); i++) {
Node node = servers.item(i);
if(node.getNodeType() == Node.ELEMENT_NODE){
Element element = (Element)node;
if("break".equals(element.getAttribute("class"))){
NodeList spanList = element.getChildNodes();
for(int j = 0;j<spanList.getLength();j++){
Node spanNode = spanList.item(j);
if(spanNode.getNodeType() == Node.ELEMENT_NODE){
Element spanElement = (Element)spanNode;
if("red14".equals(spanElement.getAttribute("class"))){
if(spanElement.hasChildNodes()&&null != spanElement.getFirstChild().getNodeValue()){
String strNum = spanElement.getFirstChild().getNodeValue();
if(strNum.indexOf("(") != -1&& strNum.indexOf("种")!= -1)
strNum = strNum.substring(strNum.indexOf("(")+1, strNum.indexOf("种")-2).trim();
if(strNum.indexOf("+") != -1)
strNum = strNum.substring(0, strNum.length()-1);
num = Long.valueOf(strNum);
//System.out.println(num);
break;
}
}
}
}
break;
}
}
}
return num;
}
public static void main(String args [])throws Exception{
China_pubparser ex = new China_pubparser();
Document doc = ex.nekohtmlParser("http://www.china-pub.com/s/?type=&ref=&tid=0&key1=java");
Price price = ex.getDetailInfo(doc);
System.out.println(price.getChina_pubDiscount() + ">>" + price.getChina_pubPrice() + price.getChina_pubUrl());
ArrayList<Book> list = ex.mainService(doc,false);
for(Iterator it = list.iterator();it.hasNext();){
Book book = (Book)it.next();
System.out.println(book.getBookAuthor() + ">>" + book.getBookFixPrice()
+ ">>" + book.getBookImage() + ">>" + book.getBookISBN()
+ ">>" + book.getBookName() + ">>" + book.getBookProspectus()
+ ">>" + book.getBookPublisher() + ">>" + book.getBookPublishTime()
+ ">>" + book.getPrice().getChina_pubDiscount() + ">>" + book.getPrice().getChina_pubPrice());
}
//System.out.println(ex.getNextPageUrl(doc));
// ex.getRecordNum(doc);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -