📄 weilanparser.java
字号:
* @return String
*/
public String getBookDiscount(Element bookElement) {
String discount = "";
NodeList divList = bookElement.getChildNodes();
for(int i = 0;i<divList.getLength();i++){
Node divNode = divList.item(i);
if(divNode.getNodeType() == Node.ELEMENT_NODE){
Element divElement = (Element)divNode;
if("priceinfo".equals(divElement.getAttribute("class"))){
NodeList aList = divElement.getChildNodes();
for(int j = 0;j<aList.getLength();j++){
Node aNode = aList.item(j);
if(aNode.getNodeType() == Node.ELEMENT_NODE){
Element aElement = (Element)aNode;
if("discount".equals(aElement.getAttribute("class"))){
if(aElement.hasChildNodes()&&null != aElement.getFirstChild().getNodeValue()){
discount = aElement.getFirstChild().getNodeValue().trim();
break;
}
}
}
}
break;
}
}
}
if(discount.length()>0){
discount = discount.replace("¥", "").trim();
discount = discount.replace("折扣:", "").trim();
discount = discount.replace("折", "").trim();
discount = "0." + discount;
}
//System.out.println(discount);
return discount;
}
/**Function:getBookImage
* Description:获得图书图片
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookImage(Element bookElement) {
String bookImg = "";
NodeList aList = bookElement.getChildNodes();
for(int i = 0;i<aList.getLength();i++){
Node aNode = aList.item(i);
if("A".equals(aNode.getNodeName())){
Element aElement = (Element)aNode;
NodeList imgList = aElement.getChildNodes();
for(int j =0;j<imgList.getLength();j++){
Node imgNode = imgList.item(j);
if(Node.ELEMENT_NODE == imgNode.getNodeType()
&&"IMG".equals(imgNode.getNodeName())){
Element imgElement = (Element)imgNode;
if(imgElement.hasAttribute("src"))
bookImg = imgElement.getAttribute("src");
break;
}
}
break;
}
}
if(bookImg.length()>0)
bookImg = bookImg.trim();
//System.out.println(bookImg);
return bookImg;
}
/**Function:getBookName
* Description:获得图书名字
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookName(Element bookElement) {
String bookName="";
NodeList proNodeList = bookElement.getChildNodes();
for(int j = 0;j<proNodeList.getLength();j++){
Node proNode = proNodeList.item(j);
if("A".equals(proNode.getNodeName())){
NodeList aNodeList = proNode.getChildNodes();
for(int i=0;i<aNodeList.getLength();i++){
Node aNode =aNodeList.item(i);
if(aNode.getNodeType() == Node.ELEMENT_NODE){
if(aNode.hasChildNodes()&&null != aNode.getFirstChild().getNodeValue())
bookName += aNode.getFirstChild().getNodeValue().trim();
}else if(null != aNode.getNodeValue()){
bookName += aNode.getNodeValue().trim();
}
}
break;
}
}
//System.out.println(bookName);
return bookName;
}
/**Function:getBookFixPrice
* Description:获得图书市场价格
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookFixPrice(Element bookElement) {
String bookFixPrice="";
NodeList divList = bookElement.getChildNodes();
for(int i = 0;i<divList.getLength();i++){
Node divNode = divList.item(i);
if(divNode.getNodeType() == Node.ELEMENT_NODE){
Element divElement = (Element)divNode;
if("priceinfo".equals(divElement.getAttribute("class"))){
NodeList aList = divElement.getChildNodes();
for(int j = 0;j<aList.getLength();j++){
Node aNode = aList.item(j);
if(aNode.getNodeType() == Node.ELEMENT_NODE){
Element aElement = (Element)aNode;
if("oprice".equals(aElement.getAttribute("class"))){
if(aElement.hasChildNodes())
bookFixPrice = aElement.getFirstChild().getNodeValue();
break;
}
}
}
break;
}
}
}
if(bookFixPrice.length()>0)
bookFixPrice = bookFixPrice.trim().replace("¥", "");
return bookFixPrice;
}
/**Function: getBookPrice
* Description:获得蔚蓝网图书价格
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookPrice(Element bookElement) {
String bookPrice ="";
NodeList divList = bookElement.getChildNodes();
for(int i = 0;i<divList.getLength();i++){
Node divNode = divList.item(i);
if(divNode.getNodeType() == Node.ELEMENT_NODE){
Element divElement = (Element)divNode;
if("priceinfo".equals(divElement.getAttribute("class"))){
NodeList aList = divElement.getChildNodes();
for(int j = 0;j<aList.getLength();j++){
Node aNode = aList.item(j);
if(aNode.getNodeType() == Node.ELEMENT_NODE){
Element aElement = (Element)aNode;
if("sprice".equals(aElement.getAttribute("class"))){
if(aElement.hasChildNodes())
bookPrice = aElement.getFirstChild().getNodeValue();
break;
}
}
}
break;
}
}
}
if(bookPrice.length()>0){
bookPrice = bookPrice.replace("¥", "");
bookPrice = bookPrice.replace("节省:", "");
bookPrice = bookPrice.trim();
}
return bookPrice;
}
/**Function:getBookPublishTime
* Description:获得蔚蓝网图书价格
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookPublishTime(Element bookElement) {
String bookPublishTime = null;
NodeList aList = bookElement.getChildNodes();
for(int i = 0;i<aList.getLength();i++){
Node aNode = aList.item(i);
if(null != aNode.getNodeValue()&&!"".equals(aNode.getNodeValue())){
if(aNode.getNodeValue().indexOf("出版日期:")!= -1){
String temStr = aNode.getNodeValue().trim();
bookPublishTime = temStr.substring(temStr.indexOf("出版日期:") + 5);
}
}
}
//System.out.println(bookPublishTime);
return bookPublishTime;
}
/**Function:getBookPublisher
* Description:获得出版社
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookPublisher(Element bookElement) {
String bookPublisher="";
NodeList aList = bookElement.getChildNodes();
for(int i = 0;i<aList.getLength();i++){
Node aNode = aList.item(i);
if(null != aNode.getNodeValue()&&!"".equals(aNode.getNodeValue())){
if(aNode.getNodeValue().indexOf("出版社")!= -1){
i++;
if(i<aList.getLength()){
Node publisherNode = aList.item(i);
if("A".equals(publisherNode.getNodeName())){
Element publisherElement = (Element)publisherNode;
if(publisherElement.hasChildNodes()
&&null != publisherElement.getFirstChild().getNodeValue())
bookPublisher = publisherElement.getFirstChild().getNodeValue().trim();
break;
}
}
}
}
}
//System.out.println(bookPublisher);
return bookPublisher;
}
/**Function:getBookContent
* Description:获得详图书简介
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookContent(Element bookElement) {
String bookContent = "";
//try{
NodeList temList = bookElement.getChildNodes();
for(int i = 0;i<temList.getLength();i++){
Node node = temList.item(i);
if(node.getNodeType() == Node.ELEMENT_NODE){
Element element = (Element)node;
if(element.hasChildNodes())
bookContent += element.getFirstChild().getNodeValue();
}else{
bookContent += node.getNodeValue();
}
}
//}catch(Exception e){
// bookContent = "";
//}
bookContent = bookContent.replace("\r", "");
bookContent = bookContent.replace("\n", "");
bookContent = bookContent.replace(" ", "");
return bookContent;
}
/**Function:getBookUrl
* Description:获得详细图书链接
* Call:no
* Called by:mainService
* @param bookElement as Element
* @throws no
* @return String
*/
public String getBookUrl(Element bookElement) {
String bookUrl = null;
NodeList aList = bookElement.getChildNodes();
for(int i = 0;i<aList.getLength();i++){
Node aNode = aList.item(i);
if("A".equals(aNode.getNodeName())){
Element aElement = (Element)aNode;
if(aElement.hasAttribute("href"))
bookUrl = aElement.getAttribute("href");
break;
}
}
return bookUrl;
}
public class WeiLanParserSec {
public String getBookISBNSec( String url) throws IOException, SAXException{
String bookISBN = "";
// 生成html parse
DOMParser parser = new DOMParser();
// 设置网站默认编码
parser.setProperty(
"http://cyberneko.org/html/properties/default-encoding",
"UTF-8");
URL u = new URL(url);
/* 建立与源网站的连接 */
URLConnection urlConnection = u.openConnection();
urlConnection.setReadTimeout(30000);
//urlConnection.setConnectTimeout(30000);
//urlConnection.connect();
/* 获得源网站的字节流,并转化为字符流,设置编码为gb2312 */
BufferedReader inputStream = new BufferedReader(new InputStreamReader(
urlConnection.getInputStream(), "UTF-8"));
/*进行解析,转化为xml*/
parser.parse(new InputSource(inputStream));
/*转化为dom对象*/
Document doc = parser.getDocument();
/*取得div节点*/
if(null != doc){
NodeList divNodeList = doc.getElementsByTagName("DIV");
for(int i=0;i<divNodeList.getLength();i++){
Node divNode = divNodeList.item(i);
if(divNode.getNodeType() == Node.ELEMENT_NODE){
Element divElement = (Element)divNode;
if("product_chanshu".equals(divElement.getAttribute("class"))){
NodeList divList = divElement.getChildNodes();
for(int j = 0;j<divList.getLength();j++){
Node tempNode = divList.item(j);
if(tempNode.getNodeType() == Node.ELEMENT_NODE){
Element tempElement = (Element)tempNode;
if(tempElement.hasChildNodes()
&&null != tempElement.getFirstChild().getNodeValue())
if(tempElement.getFirstChild().getNodeValue().indexOf("ISBN")!= -1){
j++;
if(j<divList.getLength()){
Node isbnNode = divList.item(j);
if(isbnNode.getNodeType() == Node.ELEMENT_NODE){
Element isbnElement = (Element)isbnNode;
if(isbnElement.hasChildNodes())
bookISBN = isbnElement.getFirstChild().getNodeValue();
break;
}
}
}
}
}
break;
}
}
}
}
//System.out.println(bookISBN);
return bookISBN;
}
}
public static void main(String[] args) throws Exception{
WeilanParser wp = new WeilanParser();
long beginTime = System.currentTimeMillis();
Document doc = wp
.nekohtmlParser("http://search.wl.cn/search.aspx?q=java&producttype=0&index=1");
//Price price = wp.getDetailInfo(doc);
//System.out.println(price.getWeilanDiscount() + ">>" + price.getWeilanPrice() + price.getWeilanUrl());
ArrayList<Book> list = wp.mainService(doc,true);
Iterator<Book> it = list.iterator();
while(it.hasNext()){
Book book =(Book) it.next();
System.out.println(">>"+book.getBookISBN()+">>"
+book.getBookAuthor()+">>"+book.getBookPublishTime()+">>"+book.getBookPublisher()
+">>"+book.getPrice().getWeilanPrice()+">>"+book.getPrice().getWeilanDiscount()
+">>"+book.getPrice().getWeilanUrl()+">>"+book.getBookProspectus()+">>"
+book.getBookFixPrice() +">>"+book.getBookImage()+">>"+book.getBookName());
}
// System.out.println(System.currentTimeMillis() - beginTime);
System.out.println(wp.getNextPageUrl(doc));
// System.out.println(wp.getRecordNum(doc));
}
public String getBookISBN(Element bookElement) {
return null;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -