⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 getresults.java

📁 java写的读取香港马术比赛数据,分析并导入到EXCEL的源码,可供研究用。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package hkjc2.logic;

import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

import com.singularsys.jep.Jep;

public class GetResults {

	/**
	 * 生成EXCEL
	 * @param client
	 * @param racedate
	 * @param file
	 * @throws Exception 
	 */
	public void genExcel(HttpUtil client, String racedate, File file) throws Exception {
		String[] te = racedate.split("/");
		String rd = te[2]+te[1]+te[0];
		String D = rd;
		String url = "http://www.hkjc.com/chinese/racing/results.asp?racedate="+racedate;
		String html = client.getHTML(url);
		if (html == null) {
			return;
		}
		Parser parser = new Parser();
		parser.setInputHTML(html);
		
		int pageCount = 0;
		NodeList nl = parser.extractAllNodesThatMatch(PageTableFilter);
        if (nl.size() > 0) {
        	TableTag Table3 = (TableTag)nl.elementAt(0);
        	String Table3Html = Table3.toHtml();
        	parser.setInputHTML(Table3Html);
            nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter);
            for (int i=0;i<nl.size();i++) {
            	pageCount ++;
            }
        }
        //System.out.println(pageCount);
        //如果无数据,结束
		//没有页码,则表示无数据
        if (pageCount < 1) {
        	return;
        }
        
        String C = null;
        parser.setInputHTML(html);
        nl = parser.extractAllNodesThatMatch(InfoTableFilter);
        if (nl.size() > 0) {
        	String tableHtml = nl.elementAt(0).toHtml();
        	parser.setInputHTML(tableHtml);
        	NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter);
//        	for (int i=0;i<nl1.size();i++) {
//        		System.out.println(i+": " + nl1.elementAt(i).toPlainTextString());
//        	}
        	if (nl1.size() > 0) {
        		String str = nl1.elementAt(0).toPlainTextString();
				int beginIndex = str.indexOf("(");
				int endIndex = str.indexOf(")");
				if (beginIndex > -1 && endIndex > -1)
					C = str.substring(beginIndex+1, endIndex);
        	}
        }
        
        String G = null;
        parser.setInputHTML(html);
        nl = parser.extractAllNodesThatMatch(fontgeneral_textFilter);
        if (nl.size() > 0) {
        	String str = nl.elementAt(0).getParent().toPlainTextString();
        	String[] arr = str.trim().split("\n");
        	if (arr.length > 0) {
        		G = arr[arr.length-1].trim();
        		if ("沙田".equals(G)) {
					G = "田";
				} else if ("跑馬地".equals(G)) {
					G = "谷";
				}
        	}
        }
        
        HSSFWorkbook wb = new HSSFWorkbook();
        HSSFSheet sheet = wb.createSheet();
        int count=0;
        String[] head = new String[]{"season", "meetingno", "raceinyr",
                "date", "raceno", "distance", "course", "track", "raceclass",
                "going", "cup", "horse", "age", "drawing", "brandno", "rating",
                "netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp",
                "time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin",
                "winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2",
                "d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2",
                "secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight"
            };
        HSSFRow headRow = sheet.createRow((short) count);
        for (int j = 0; j < head.length; j++) {
            HSSFCell cell = headRow.createCell((short) j);
            cell.setCellValue(head[j]);
        }
        count++;
        
		//读取指定日期的网页
		for (int n=0;n<pageCount;n++) {
			String E = null;
			String F = null;
			String H = null;
			String I = null;
			String J = null;
			String K = null;
			String AQ = null;
			String AR = null;
			String AS = null;
			String AT = null;
			String AU = null;
			String AV = null;
			String AK = null;
			String AM = null;
			String AN = null;
			String AO = null;
			String AW = null;
			url = "http://www.hkjc.com/chinese/racing/results.asp?racedate="+racedate+"&raceno="+(n+1);
			html = client.getHTML(url);
			if (html == null) continue;
			
			List<List<String>> rows = new ArrayList<List<String>>();
			//读取每一页的数据,并保存至数据库,可能是更新也可能是新增
			parser.setInputHTML(html);
			nl = parser.extractAllNodesThatMatch(InfoTableFilter);
	        if (nl.size() > 0) {
	        	String tableHtml = nl.elementAt(0).toHtml();
	        	parser.setInputHTML(tableHtml);
	        	NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter);
//		        	for (int i=0;i<nl1.size();i++) {
//		        		System.out.println(i+": " + nl1.elementAt(i).toPlainTextString());
//		        	}
	        	if (nl1.size() > 0) {
	        		String str = nl1.elementAt(0).toPlainTextString();
					int beginIndex = str.indexOf("(");
					int endIndex = str.indexOf(")");
					if (beginIndex > -1 && endIndex > -1)
						C = str.substring(beginIndex+1, endIndex);
					
					beginIndex = str.indexOf("第");
					endIndex = str.indexOf("場");
					if (beginIndex > -1 && endIndex > -1)
						E = str.substring(beginIndex+1, endIndex).replaceAll("&nbsp;", "").trim();
	        	}
	        	if (nl1.size() > 2) {
	        		String str = nl1.elementAt(2).toPlainTextString();
					int beginIndex = str.indexOf("第");
					int endIndex = str.indexOf("班");
					if (beginIndex > -1 && endIndex > -1)
						I = str.substring(beginIndex+1, endIndex);
					
					beginIndex = str.indexOf("-");
					endIndex = str.indexOf("米");
					if (beginIndex > -1 && endIndex > -1)
						F = str.substring(beginIndex+1, endIndex).replaceAll("&nbsp", "").replaceAll(";", "").trim();
	        	}
	        	if (nl1.size() > 3) {
	        		String str = nl1.elementAt(3).toPlainTextString();
	        		str = str.replaceAll("\n", "");
	        		str = str.replaceAll("\r", "");
					K = client.toChinese(str).trim();
	        	}
	        	if (nl1.size() > 8) {
	        		String str = nl1.elementAt(8).toPlainTextString();
	        		str = str.replaceAll("\n", "");
	        		str = str.replaceAll("\r", "");
					J = client.toChinese(str).trim();
	        	}
	        	if (nl1.size() > 10) {
	        		String str = nl1.elementAt(10).toPlainTextString();
	        		int beginIndex = str.indexOf("\"");
					int endIndex = str.lastIndexOf("\"");
					if (beginIndex > -1 && endIndex > -1)
						H = str.substring(beginIndex+1, endIndex);
	        	}
	        	for (int i=0;i<nl1.size();i++) {
	        		if ("分段時間 :".equals(nl1.elementAt(i).toPlainTextString().trim())) {
	        			List<String> AQV = new ArrayList<String>();
	        			for (int j=i+1;j<nl1.size() && j<i+7;j++) {
	        				AQV.add(nl1.elementAt(j).toPlainTextString().trim());
	        			}
        				if (AQV.size() > 0) AV = AQV.get(AQV.size() - 1);
        				if (AQV.size() > 1) AU = AQV.get(AQV.size() - 2);
        				if (AQV.size() > 2) AT = AQV.get(AQV.size() - 3);
        				if (AQV.size() > 3) AS = AQV.get(AQV.size() - 4);
        				if (AQV.size() > 4) AR = AQV.get(AQV.size() - 5);
        				if (AQV.size() > 5) AQ = AQV.get(AQV.size() - 6);
	        			break;
	        		}
	        	}
	        	
	        	parser.setInputHTML(html);
	            nl = parser.extractAllNodesThatMatch(Table450S1P0B0Filter);
	            if (nl.size() > 0) {
	            	String Table450S1P0B0HTML = nl.elementAt(0).toHtml();
	            	parser.setInputHTML(Table450S1P0B0HTML);
	            	nl = parser.extractAllNodesThatMatch(font2ArialFilter);
//		            	for (int i=0;i<nl.size();i++) {
//		            		System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
//		            	}
	            	if (nl.size() > 2) AK = nl.elementAt(2).getParent().toPlainTextString();
	            	if (nl.size() > 5) AM = nl.elementAt(5).getParent().toPlainTextString();
	            	if (nl.size() > 7) AN = nl.elementAt(7).getParent().toPlainTextString();
	            	if (nl.size() > 9) AO = nl.elementAt(9).getParent().toPlainTextString();
	            	if (nl.size() > 12) AW = nl.elementAt(12).getParent().toPlainTextString();
	            }
	            System.out.println("C="+C+" D="+rd+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" AQ="+AQ+" AR="+AR+" AS="+AS+" AT="+AT+" AU="+AU+" AV="+AV
	            		+" AK="+AK+" AM="+AM+" AN="+AN+" AO="+AO+" AW="+AW);
	            
	        	parser.setInputHTML(html);
	        	NodeList nlList = parser.extractAllNodesThatMatch(ListTableFilter);
	        	if (nlList.size() > 0) {
	        		NodeList Rows = null;
	                String ListHtml = nlList.elementAt(0).toHtml();
                    parser.setInputHTML(ListHtml);
                    Rows = parser.extractAllNodesThatMatch(TrFilter);
                    String Y = null;
	                for (int i = 1; i < Rows.size(); i++) {
	                	String L = null;
	                	String N = null;
	                	String O = null;
	                	String Q = null;
	                	String U = null;
	                	String V = null;
	                	String W = null;
	                	String X = null;
	                	String AE = null;
	                	String AJ = null;
	                	String AX = null;
	                    Node node = Rows.elementAt(i);
	                    String RowHtml = node.toHtml();
	                    parser.setInputHTML(RowHtml);
	                    try {
	                    	NodeList Cells = parser.extractAllNodesThatMatch(TdFilter);
	                    	for (int j=0;j<Cells.size();j++) {
	                    		//System.out.println(j + ": " + Cells.elementAt(j).toPlainTextString());
	                    		if (Cells.size() > 5) Q = Cells.elementAt(5).toPlainTextString();
	                    		if (Cells.size() > 0) W = Cells.elementAt(0).toPlainTextString();
	                    		if (Cells.size() > 3) U = client.toChinese(Cells.elementAt(3).toPlainTextString());
	                    		if (Cells.size() > 4) V = client.toChinese(Cells.elementAt(4).toPlainTextString());
	                    		if (Cells.size() > 9) X = Cells.elementAt(9).toPlainTextString();
	                    		if (i == 1) Y = X;
	                    		if (Cells.size() > 7) N = Cells.elementAt(7).toPlainTextString();
	                    		if (Cells.size() > 8) {
	                    			AE = Cells.elementAt(8).toPlainTextString().replaceAll("&nbsp;", "").trim();
	                    			if ("-".equals(AE)) {
	                    				AE = "0";
	                    			} else if ("短馬頭位".equals(AE)) {
	                    				AE = "0.1";
	                    			} else if ("一頭位".equals(AE)) {
	                    				AE = "0.2";
	                    			} else if ("頸位".equals(AE)) {
	                    				AE = "0.5";
	                    			} else if ("多個馬位".equals(AE)) {
	                    				AE = "99";
	                    			}
	                    		}
	                    		if (Cells.size() > 10) AJ = Cells.elementAt(10).toPlainTextString();
	                    		if (Cells.size() > 6) AX = Cells.elementAt(6).toPlainTextString();
	                    		if (Cells.size() > 2) {
	                    			O = Cells.elementAt(2).toPlainTextString();
	                    			int beginIndex = O.indexOf("(");
	                    			if (beginIndex > -1)
	                    				L = client.toChinese(O.substring(0, beginIndex));
									int endIndex = O.indexOf(")");
									if (beginIndex > -1 && endIndex > -1)
										O = O.substring(beginIndex+1, endIndex);
	                    		}
	                    		System.out.println(" L="+L+" N="+N+" O="+O+" U="+U+" V="+V+"W="+W+" X="+X+" Y="+Y+" AE="+AE+" AJ="+AJ+" AX="+AX);
	                    	}
	                    } catch (ParserException e) {
	                        e.printStackTrace();
	                    }
	                    
	                    List<String> cells = new ArrayList<String>();
			    		for (int m=0;m<50;m++) cells.add(null);
			    		cells.set(2, C);
			    		cells.set(3, D);
			    		cells.set(4, E);
			    		cells.set(5, F);
			    		cells.set(6, G);
			    		cells.set(7, H);
			    		cells.set(8, I);
			    		cells.set(9, J);
			    		cells.set(10, K);
			    		cells.set(11, L);
			    		cells.set(13, N);
			    		cells.set(14, O);
			    		cells.set(16, Q);
			    		cells.set(20, U);
			    		cells.set(21, V);
			    		cells.set(22, W);
			    		cells.set(23, X);
			    		cells.set(24, Y);
			    		cells.set(30, AE);
			    		cells.set(35, AJ);
			    		cells.set(36, AK);
			    		cells.set(38, AM);
			    		cells.set(39, AN);
			    		cells.set(40, AO);
			    		cells.set(42, AQ);
			    		cells.set(43, AR);
			    		cells.set(44, AS);
			    		cells.set(45, AT);
			    		cells.set(46, AU);
			    		cells.set(47, AV);
			    		cells.set(48, AW);
			    		cells.set(49, AX);
			    		rows.add(cells);
	                }
	        	}
	        }
	        
	        //如果有分段时间及位置,读取分段时间及位置
        	String secHtml = client.getHTML("http://www.hkjc.com/chinese/racing/display_sectionaltime.asp?RaceDate="+racedate+"&Raceno="+(n+1));
//	    		System.out.println(srcHtml);
    		if (secHtml == null) continue;
    		
    		parser.setInputHTML(secHtml);
        	NodeList nlSecTable = parser.extractAllNodesThatMatch(SecTableFilter);
//	        	for (int i=0;i<nlSecTable.size();i++) {
//	        		System.out.println(i + ": " + nlSecTable.elementAt(i).toHtml());
//	        	}
        	if (nlSecTable.size() > 0) {
        		TableTag table = (TableTag)nlSecTable.elementAt(0);
        		List<TableRow> Rows = new ArrayList<TableRow>();
        		for (int i=0;i<table.getChildren().size();i++) {
        			Node child = table.getChildren().elementAt(i); 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -