⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 getstarters.java

📁 java写的读取香港马术比赛数据,分析并导入到EXCEL的源码,可供研究用。
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
package hkjc2.logic;

import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;

import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class GetStarters {

	/**
	 * 更新排位表
	 * @param client
	 * @throws Exception 
	 */
	public void UpdateStarters(HttpUtil client, File file) throws Exception {
		String url = "http://www.hkjc.com/chinese/racing/startersR1_c.asp";
		String html = client.getHTML(url);
		if (html == null) return;
		Parser parser = new Parser();
		parser.setInputHTML(html);
		
		int pageCount = 0;
		String tag = null;
		NodeList nl = parser.extractAllNodesThatMatch(Table3Filter);
        if (nl.size() > 0) {
        	TableTag Table3 = (TableTag)nl.elementAt(0);
        	String Table3Html = Table3.toHtml();
        	parser.setInputHTML(Table3Html);
            nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter);
            for (int i=0;i<nl.size();i++) {
//            	System.out.println(nl.elementAt(i).toHtml());
            	pageCount ++;
            	if (tag == null) {
	            	NodeList children = nl.elementAt(i).getChildren();
	            	if (children.size() > 0) {
	            		if (children.elementAt(0) instanceof LinkTag) {
	            			LinkTag link = (LinkTag)children.elementAt(0);
	            			tag = link.getLink();
	            			if (tag != null) {
	            				String[] arr = tag.split("_");
	            				if (arr.length > 1) tag = arr[1];
	            			}
	            			//System.out.println(tag);
	            		}
	            	}
            	}
            }
        }
        //如果无数据,结束
		//没有页码,则表示无数据
        if (pageCount < 1) return;
		
		String racedateYear = "";
		String racedateMonth = "";
		String racedateDay = "";
		String racedate = "";
		String G = null;
		parser.setInputHTML(html);
		nl = parser.extractAllNodesThatMatch(FontSize2FaceAHSFilter);
		if (nl.size() > 1) {
//			for (int i=0;i<nl.size();i++) {
//				System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
//			}
			String course = nl.elementAt(1).getParent().toPlainTextString();
			String[] temp = course.split(",");
			if (temp.length > 0) {
				String date = temp[0].trim();
				int yearIndex = date.indexOf("年");
				int monthIndex = date.indexOf("月");
				int dayIndex = date.indexOf("日");
				racedateYear = date.substring(0, yearIndex);
				racedateMonth = date.substring(yearIndex+1, monthIndex);
				racedateMonth = "0"+racedateMonth;
				racedateMonth = racedateMonth.substring(racedateMonth.length()-2, 2);
				racedateDay = date.substring(monthIndex+1, dayIndex);
				racedate = racedateYear+racedateMonth+racedateDay;
				//System.out.println(racedateYear + "/" + racedateMonth + "/" + racedateDay);
			}
			if (temp.length > 2) {
				G = temp[2].trim();
			}
		}
		
		UpdateData(client, parser, pageCount, racedate, file, tag);
	}

	private void UpdateData(HttpUtil client, Parser parser, int pageCount,
			String racedate, File file, String tag)
			throws Exception, ParserException {
		HSSFWorkbook wb = new HSSFWorkbook();
        HSSFSheet sheet = wb.createSheet();
        int count=0;
        String[] head = new String[]{"season", "meetingno", "raceinyr",
                "date", "raceno", "distance", "course", "track", "raceclass",
                "going", "cup", "horse", "age", "drawing", "brandno", "rating",
                "netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp",
                "time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin",
                "winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2",
                "d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2",
                "secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight"
            };
        HSSFRow headRow = sheet.createRow((short) count);
        for (int j = 0; j < head.length; j++) {
            HSSFCell cell = headRow.createCell((short) j);
            cell.setCellValue(head[j]);
        }
        count++;
        
		for (int n=0;n<pageCount;n++) {
			String D=null;
			String E=null;
			String F=null;
			String G=null;
			String H=null;
			String I=null;
			String J=null;
			String K=null;
			D = racedate;
			J = "TU";
			String url = "http://www.hkjc.com/chinese/racing/StartersR"+(n+1)+"_"+tag+"_"+racedate+"_C.asp";
			System.out.println("url: " + url);
			String html = client.getHTML(url);
			parser.setInputHTML(html);
			NodeList nl = parser.extractAllNodesThatMatch(FontSize2FaceAHSFilter);
//			for (int i=0;i<nl.size();i++) {
//				System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
//			}
			if (nl.size() > 1) {
				String str = nl.elementAt(1).getParent().toPlainTextString();
				String[] temp = str.split(",");
				if (temp.length > 2) {
					G = temp[2].trim();
					if ("沙田".equals(G)) {
						G = "田";
					} else if ("跑馬地".equals(G)) {
						G = "谷";
					}
				}
			}
			if (nl.size() > 2) {
				String str = nl.elementAt(2).getParent().toPlainTextString();
				String[] temp = str.split(",");
				if (temp.length > 1) {
					H = temp[1].trim();
					H = H.replaceAll("\"", "");
				}
				if (temp.length > 2) {
					F = temp[2].trim();
					int idx = F.indexOf("米");
					if (idx > -1) F = F.substring(0, idx);
				}
			}
			if (nl.size() > 4) {
				String str = nl.elementAt(4).getParent().toPlainTextString();
				int beginIndex = str.lastIndexOf("第");
				int endIndex = str.lastIndexOf("班");
				if (beginIndex > -1 && endIndex > -1) 
					I = str.substring(beginIndex+1, endIndex);
			}
			parser.setInputHTML(html);
			nl = parser.extractAllNodesThatMatch(FontSize2FaceHKSCSFilter);
//				for (int i=0;i<nl.size();i++) {
//					System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
//				}
			if (nl.size() > 0) {
				String str = nl.elementAt(0).getParent().toPlainTextString();
				String[] temp = str.split(" ");
				if (temp.length > 1) {
					E = temp[1].trim();
				}
				if (temp.length > 4) {
					K = temp[4].trim();
					K = client.toChinese(K);
				}
			}
			
			System.out.println("D="+D+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" K="+K);
			
			parser.setInputHTML(html);
			nl = parser.extractAllNodesThatMatch(ScriptFilter);
//				for (int i=0;i<nl.size();i++) {
//					System.out.println(i + ": " + nl.elementAt(i).toPlainTextString());
//				}
			int hourseCount = 1;
			if (nl.size() > 0) {
				String str = nl.elementAt(0).getParent().toPlainTextString();
				int startIndex = str.indexOf("ColPos = new Array(30) ;") + "ColPos = new Array(30) ;".length();
				int endIndex = str.indexOf("var PageName");
				str = str.substring(startIndex + 1, endIndex);
				String[] arr = str.split("\n");
				if (arr.length > 2) {
					for (int i=2;i<arr.length;i+=2) {
						//System.out.println(arr[i]);
						String L = null;
						String M = null;
						String N = null;
						String O = null;
						String P = null;
						String Q = null;
						String T = "0";
						String U = null;
						String offset = null;
						String V = null;
						String W = null;
						String[] cols = arr[i].split("\"");
//						for (int j=0;j<cols.length;j++) {
//							System.out.println(j + ": " + client.toChinese(cols[j]));
//						}
						L = client.toChinese(cols[9]);
						M = cols[17];
						N = cols[27];
						O = cols[35];
						P = cols[21];
						Q = cols[11];
						U = client.toChinese(cols[25]).trim();
						startIndex = U.indexOf("(");
						endIndex = U.indexOf(")");
						if (startIndex > -1 && endIndex > -1)
							offset = U.substring(startIndex + 1, endIndex);
						if (offset != null) {
							try {Q = "" + (Integer.parseInt(Q) + Integer.parseInt(offset));}catch(Exception e){}
						}
						
						V = client.toChinese(cols[15]).trim();
						W = cols[1];
						System.out.println("L="+L+" M="+M+" N="+N+" O="+O+" P="+P+" Q="+Q+" U="+U+" V="+V+" W="+W);

			    		List<String> cells = new ArrayList<String>();
			    		for (int m=0;m<50;m++) cells.add(null);

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -