📄 getstarters.java
字号:
package hkjc2.logic;
import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.tags.ScriptTag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class GetStarters {
/**
* 更新排位表
* @param client
* @throws Exception
*/
public void UpdateStarters(HttpUtil client, File file) throws Exception {
String url = "http://www.hkjc.com/chinese/racing/startersR1_c.asp";
String html = client.getHTML(url);
if (html == null) return;
Parser parser = new Parser();
parser.setInputHTML(html);
int pageCount = 0;
String tag = null;
NodeList nl = parser.extractAllNodesThatMatch(Table3Filter);
if (nl.size() > 0) {
TableTag Table3 = (TableTag)nl.elementAt(0);
String Table3Html = Table3.toHtml();
parser.setInputHTML(Table3Html);
nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter);
for (int i=0;i<nl.size();i++) {
// System.out.println(nl.elementAt(i).toHtml());
pageCount ++;
if (tag == null) {
NodeList children = nl.elementAt(i).getChildren();
if (children.size() > 0) {
if (children.elementAt(0) instanceof LinkTag) {
LinkTag link = (LinkTag)children.elementAt(0);
tag = link.getLink();
if (tag != null) {
String[] arr = tag.split("_");
if (arr.length > 1) tag = arr[1];
}
//System.out.println(tag);
}
}
}
}
}
//如果无数据,结束
//没有页码,则表示无数据
if (pageCount < 1) return;
String racedateYear = "";
String racedateMonth = "";
String racedateDay = "";
String racedate = "";
String G = null;
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(FontSize2FaceAHSFilter);
if (nl.size() > 1) {
// for (int i=0;i<nl.size();i++) {
// System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
// }
String course = nl.elementAt(1).getParent().toPlainTextString();
String[] temp = course.split(",");
if (temp.length > 0) {
String date = temp[0].trim();
int yearIndex = date.indexOf("年");
int monthIndex = date.indexOf("月");
int dayIndex = date.indexOf("日");
racedateYear = date.substring(0, yearIndex);
racedateMonth = date.substring(yearIndex+1, monthIndex);
racedateMonth = "0"+racedateMonth;
racedateMonth = racedateMonth.substring(racedateMonth.length()-2, 2);
racedateDay = date.substring(monthIndex+1, dayIndex);
racedate = racedateYear+racedateMonth+racedateDay;
//System.out.println(racedateYear + "/" + racedateMonth + "/" + racedateDay);
}
if (temp.length > 2) {
G = temp[2].trim();
}
}
UpdateData(client, parser, pageCount, racedate, file, tag);
}
private void UpdateData(HttpUtil client, Parser parser, int pageCount,
String racedate, File file, String tag)
throws Exception, ParserException {
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet();
int count=0;
String[] head = new String[]{"season", "meetingno", "raceinyr",
"date", "raceno", "distance", "course", "track", "raceclass",
"going", "cup", "horse", "age", "drawing", "brandno", "rating",
"netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp",
"time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin",
"winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2",
"d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2",
"secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight"
};
HSSFRow headRow = sheet.createRow((short) count);
for (int j = 0; j < head.length; j++) {
HSSFCell cell = headRow.createCell((short) j);
cell.setCellValue(head[j]);
}
count++;
for (int n=0;n<pageCount;n++) {
String D=null;
String E=null;
String F=null;
String G=null;
String H=null;
String I=null;
String J=null;
String K=null;
D = racedate;
J = "TU";
String url = "http://www.hkjc.com/chinese/racing/StartersR"+(n+1)+"_"+tag+"_"+racedate+"_C.asp";
System.out.println("url: " + url);
String html = client.getHTML(url);
parser.setInputHTML(html);
NodeList nl = parser.extractAllNodesThatMatch(FontSize2FaceAHSFilter);
// for (int i=0;i<nl.size();i++) {
// System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
// }
if (nl.size() > 1) {
String str = nl.elementAt(1).getParent().toPlainTextString();
String[] temp = str.split(",");
if (temp.length > 2) {
G = temp[2].trim();
if ("沙田".equals(G)) {
G = "田";
} else if ("跑馬地".equals(G)) {
G = "谷";
}
}
}
if (nl.size() > 2) {
String str = nl.elementAt(2).getParent().toPlainTextString();
String[] temp = str.split(",");
if (temp.length > 1) {
H = temp[1].trim();
H = H.replaceAll("\"", "");
}
if (temp.length > 2) {
F = temp[2].trim();
int idx = F.indexOf("米");
if (idx > -1) F = F.substring(0, idx);
}
}
if (nl.size() > 4) {
String str = nl.elementAt(4).getParent().toPlainTextString();
int beginIndex = str.lastIndexOf("第");
int endIndex = str.lastIndexOf("班");
if (beginIndex > -1 && endIndex > -1)
I = str.substring(beginIndex+1, endIndex);
}
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(FontSize2FaceHKSCSFilter);
// for (int i=0;i<nl.size();i++) {
// System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
// }
if (nl.size() > 0) {
String str = nl.elementAt(0).getParent().toPlainTextString();
String[] temp = str.split(" ");
if (temp.length > 1) {
E = temp[1].trim();
}
if (temp.length > 4) {
K = temp[4].trim();
K = client.toChinese(K);
}
}
System.out.println("D="+D+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" K="+K);
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(ScriptFilter);
// for (int i=0;i<nl.size();i++) {
// System.out.println(i + ": " + nl.elementAt(i).toPlainTextString());
// }
int hourseCount = 1;
if (nl.size() > 0) {
String str = nl.elementAt(0).getParent().toPlainTextString();
int startIndex = str.indexOf("ColPos = new Array(30) ;") + "ColPos = new Array(30) ;".length();
int endIndex = str.indexOf("var PageName");
str = str.substring(startIndex + 1, endIndex);
String[] arr = str.split("\n");
if (arr.length > 2) {
for (int i=2;i<arr.length;i+=2) {
//System.out.println(arr[i]);
String L = null;
String M = null;
String N = null;
String O = null;
String P = null;
String Q = null;
String T = "0";
String U = null;
String offset = null;
String V = null;
String W = null;
String[] cols = arr[i].split("\"");
// for (int j=0;j<cols.length;j++) {
// System.out.println(j + ": " + client.toChinese(cols[j]));
// }
L = client.toChinese(cols[9]);
M = cols[17];
N = cols[27];
O = cols[35];
P = cols[21];
Q = cols[11];
U = client.toChinese(cols[25]).trim();
startIndex = U.indexOf("(");
endIndex = U.indexOf(")");
if (startIndex > -1 && endIndex > -1)
offset = U.substring(startIndex + 1, endIndex);
if (offset != null) {
try {Q = "" + (Integer.parseInt(Q) + Integer.parseInt(offset));}catch(Exception e){}
}
V = client.toChinese(cols[15]).trim();
W = cols[1];
System.out.println("L="+L+" M="+M+" N="+N+" O="+O+" P="+P+" Q="+Q+" U="+U+" V="+V+" W="+W);
List<String> cells = new ArrayList<String>();
for (int m=0;m<50;m++) cells.add(null);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -