📄 getresults.java
字号:
package hkjc2.logic;
import java.io.File;
import java.io.FileOutputStream;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import org.apache.poi.hssf.usermodel.HSSFCell;
import org.apache.poi.hssf.usermodel.HSSFCellStyle;
import org.apache.poi.hssf.usermodel.HSSFDataFormat;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.Tag;
import org.htmlparser.tags.TableColumn;
import org.htmlparser.tags.TableRow;
import org.htmlparser.tags.TableTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
import com.singularsys.jep.Jep;
public class GetResults {
/**
* 生成EXCEL
* @param client
* @param racedate
* @param file
* @throws Exception
*/
public void genExcel(HttpUtil client, String racedate, File file) throws Exception {
String[] te = racedate.split("/");
String rd = te[2]+te[1]+te[0];
String D = rd;
String url = "http://www.hkjc.com/chinese/racing/results.asp?racedate="+racedate;
String html = client.getHTML(url);
if (html == null) {
return;
}
Parser parser = new Parser();
parser.setInputHTML(html);
int pageCount = 0;
NodeList nl = parser.extractAllNodesThatMatch(PageTableFilter);
if (nl.size() > 0) {
TableTag Table3 = (TableTag)nl.elementAt(0);
String Table3Html = Table3.toHtml();
parser.setInputHTML(Table3Html);
nl = parser.extractAllNodesThatMatch(TdNowrapCenterFilter);
for (int i=0;i<nl.size();i++) {
pageCount ++;
}
}
//System.out.println(pageCount);
//如果无数据,结束
//没有页码,则表示无数据
if (pageCount < 1) {
return;
}
String C = null;
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(InfoTableFilter);
if (nl.size() > 0) {
String tableHtml = nl.elementAt(0).toHtml();
parser.setInputHTML(tableHtml);
NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter);
// for (int i=0;i<nl1.size();i++) {
// System.out.println(i+": " + nl1.elementAt(i).toPlainTextString());
// }
if (nl1.size() > 0) {
String str = nl1.elementAt(0).toPlainTextString();
int beginIndex = str.indexOf("(");
int endIndex = str.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
C = str.substring(beginIndex+1, endIndex);
}
}
String G = null;
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(fontgeneral_textFilter);
if (nl.size() > 0) {
String str = nl.elementAt(0).getParent().toPlainTextString();
String[] arr = str.trim().split("\n");
if (arr.length > 0) {
G = arr[arr.length-1].trim();
if ("沙田".equals(G)) {
G = "田";
} else if ("跑馬地".equals(G)) {
G = "谷";
}
}
}
HSSFWorkbook wb = new HSSFWorkbook();
HSSFSheet sheet = wb.createSheet();
int count=0;
String[] head = new String[]{"season", "meetingno", "raceinyr",
"date", "raceno", "distance", "course", "track", "raceclass",
"going", "cup", "horse", "age", "drawing", "brandno", "rating",
"netload", "updn", "bleeding", "reserve", "jockey", "stable", "fp",
"time", "wintime", "pos1", "pos2", "pos3", "pos4", "pos5", "margin",
"winticket", "plcticket", "oddon", "oddbr", "oddfn", "d_win1", "d_win2",
"d_place1", "d_place2", "d_place3", "d_place4", "secttime1", "secttime2",
"secttime3", "secttime4", "secttime5", "secttime6", "d_quin", "bodyweight"
};
HSSFRow headRow = sheet.createRow((short) count);
for (int j = 0; j < head.length; j++) {
HSSFCell cell = headRow.createCell((short) j);
cell.setCellValue(head[j]);
}
count++;
//读取指定日期的网页
for (int n=0;n<pageCount;n++) {
String E = null;
String F = null;
String H = null;
String I = null;
String J = null;
String K = null;
String AQ = null;
String AR = null;
String AS = null;
String AT = null;
String AU = null;
String AV = null;
String AK = null;
String AM = null;
String AN = null;
String AO = null;
String AW = null;
url = "http://www.hkjc.com/chinese/racing/results.asp?racedate="+racedate+"&raceno="+(n+1);
html = client.getHTML(url);
if (html == null) continue;
List<List<String>> rows = new ArrayList<List<String>>();
//读取每一页的数据,并保存至数据库,可能是更新也可能是新增
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(InfoTableFilter);
if (nl.size() > 0) {
String tableHtml = nl.elementAt(0).toHtml();
parser.setInputHTML(tableHtml);
NodeList nl1 = parser.extractAllNodesThatMatch(TdFilter);
// for (int i=0;i<nl1.size();i++) {
// System.out.println(i+": " + nl1.elementAt(i).toPlainTextString());
// }
if (nl1.size() > 0) {
String str = nl1.elementAt(0).toPlainTextString();
int beginIndex = str.indexOf("(");
int endIndex = str.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
C = str.substring(beginIndex+1, endIndex);
beginIndex = str.indexOf("第");
endIndex = str.indexOf("場");
if (beginIndex > -1 && endIndex > -1)
E = str.substring(beginIndex+1, endIndex).replaceAll(" ", "").trim();
}
if (nl1.size() > 2) {
String str = nl1.elementAt(2).toPlainTextString();
int beginIndex = str.indexOf("第");
int endIndex = str.indexOf("班");
if (beginIndex > -1 && endIndex > -1)
I = str.substring(beginIndex+1, endIndex);
beginIndex = str.indexOf("-");
endIndex = str.indexOf("米");
if (beginIndex > -1 && endIndex > -1)
F = str.substring(beginIndex+1, endIndex).replaceAll(" ", "").replaceAll(";", "").trim();
}
if (nl1.size() > 3) {
String str = nl1.elementAt(3).toPlainTextString();
str = str.replaceAll("\n", "");
str = str.replaceAll("\r", "");
K = client.toChinese(str).trim();
}
if (nl1.size() > 8) {
String str = nl1.elementAt(8).toPlainTextString();
str = str.replaceAll("\n", "");
str = str.replaceAll("\r", "");
J = client.toChinese(str).trim();
}
if (nl1.size() > 10) {
String str = nl1.elementAt(10).toPlainTextString();
int beginIndex = str.indexOf("\"");
int endIndex = str.lastIndexOf("\"");
if (beginIndex > -1 && endIndex > -1)
H = str.substring(beginIndex+1, endIndex);
}
for (int i=0;i<nl1.size();i++) {
if ("分段時間 :".equals(nl1.elementAt(i).toPlainTextString().trim())) {
List<String> AQV = new ArrayList<String>();
for (int j=i+1;j<nl1.size() && j<i+7;j++) {
AQV.add(nl1.elementAt(j).toPlainTextString().trim());
}
if (AQV.size() > 0) AV = AQV.get(AQV.size() - 1);
if (AQV.size() > 1) AU = AQV.get(AQV.size() - 2);
if (AQV.size() > 2) AT = AQV.get(AQV.size() - 3);
if (AQV.size() > 3) AS = AQV.get(AQV.size() - 4);
if (AQV.size() > 4) AR = AQV.get(AQV.size() - 5);
if (AQV.size() > 5) AQ = AQV.get(AQV.size() - 6);
break;
}
}
parser.setInputHTML(html);
nl = parser.extractAllNodesThatMatch(Table450S1P0B0Filter);
if (nl.size() > 0) {
String Table450S1P0B0HTML = nl.elementAt(0).toHtml();
parser.setInputHTML(Table450S1P0B0HTML);
nl = parser.extractAllNodesThatMatch(font2ArialFilter);
// for (int i=0;i<nl.size();i++) {
// System.out.println(i + ": " + nl.elementAt(i).getParent().toPlainTextString());
// }
if (nl.size() > 2) AK = nl.elementAt(2).getParent().toPlainTextString();
if (nl.size() > 5) AM = nl.elementAt(5).getParent().toPlainTextString();
if (nl.size() > 7) AN = nl.elementAt(7).getParent().toPlainTextString();
if (nl.size() > 9) AO = nl.elementAt(9).getParent().toPlainTextString();
if (nl.size() > 12) AW = nl.elementAt(12).getParent().toPlainTextString();
}
System.out.println("C="+C+" D="+rd+" E="+E+" F="+F+" G="+G+" H="+H+" I="+I+" J="+J+" AQ="+AQ+" AR="+AR+" AS="+AS+" AT="+AT+" AU="+AU+" AV="+AV
+" AK="+AK+" AM="+AM+" AN="+AN+" AO="+AO+" AW="+AW);
parser.setInputHTML(html);
NodeList nlList = parser.extractAllNodesThatMatch(ListTableFilter);
if (nlList.size() > 0) {
NodeList Rows = null;
String ListHtml = nlList.elementAt(0).toHtml();
parser.setInputHTML(ListHtml);
Rows = parser.extractAllNodesThatMatch(TrFilter);
String Y = null;
for (int i = 1; i < Rows.size(); i++) {
String L = null;
String N = null;
String O = null;
String Q = null;
String U = null;
String V = null;
String W = null;
String X = null;
String AE = null;
String AJ = null;
String AX = null;
Node node = Rows.elementAt(i);
String RowHtml = node.toHtml();
parser.setInputHTML(RowHtml);
try {
NodeList Cells = parser.extractAllNodesThatMatch(TdFilter);
for (int j=0;j<Cells.size();j++) {
//System.out.println(j + ": " + Cells.elementAt(j).toPlainTextString());
if (Cells.size() > 5) Q = Cells.elementAt(5).toPlainTextString();
if (Cells.size() > 0) W = Cells.elementAt(0).toPlainTextString();
if (Cells.size() > 3) U = client.toChinese(Cells.elementAt(3).toPlainTextString());
if (Cells.size() > 4) V = client.toChinese(Cells.elementAt(4).toPlainTextString());
if (Cells.size() > 9) X = Cells.elementAt(9).toPlainTextString();
if (i == 1) Y = X;
if (Cells.size() > 7) N = Cells.elementAt(7).toPlainTextString();
if (Cells.size() > 8) {
AE = Cells.elementAt(8).toPlainTextString().replaceAll(" ", "").trim();
if ("-".equals(AE)) {
AE = "0";
} else if ("短馬頭位".equals(AE)) {
AE = "0.1";
} else if ("一頭位".equals(AE)) {
AE = "0.2";
} else if ("頸位".equals(AE)) {
AE = "0.5";
} else if ("多個馬位".equals(AE)) {
AE = "99";
}
}
if (Cells.size() > 10) AJ = Cells.elementAt(10).toPlainTextString();
if (Cells.size() > 6) AX = Cells.elementAt(6).toPlainTextString();
if (Cells.size() > 2) {
O = Cells.elementAt(2).toPlainTextString();
int beginIndex = O.indexOf("(");
if (beginIndex > -1)
L = client.toChinese(O.substring(0, beginIndex));
int endIndex = O.indexOf(")");
if (beginIndex > -1 && endIndex > -1)
O = O.substring(beginIndex+1, endIndex);
}
System.out.println(" L="+L+" N="+N+" O="+O+" U="+U+" V="+V+"W="+W+" X="+X+" Y="+Y+" AE="+AE+" AJ="+AJ+" AX="+AX);
}
} catch (ParserException e) {
e.printStackTrace();
}
List<String> cells = new ArrayList<String>();
for (int m=0;m<50;m++) cells.add(null);
cells.set(2, C);
cells.set(3, D);
cells.set(4, E);
cells.set(5, F);
cells.set(6, G);
cells.set(7, H);
cells.set(8, I);
cells.set(9, J);
cells.set(10, K);
cells.set(11, L);
cells.set(13, N);
cells.set(14, O);
cells.set(16, Q);
cells.set(20, U);
cells.set(21, V);
cells.set(22, W);
cells.set(23, X);
cells.set(24, Y);
cells.set(30, AE);
cells.set(35, AJ);
cells.set(36, AK);
cells.set(38, AM);
cells.set(39, AN);
cells.set(40, AO);
cells.set(42, AQ);
cells.set(43, AR);
cells.set(44, AS);
cells.set(45, AT);
cells.set(46, AU);
cells.set(47, AV);
cells.set(48, AW);
cells.set(49, AX);
rows.add(cells);
}
}
}
//如果有分段时间及位置,读取分段时间及位置
String secHtml = client.getHTML("http://www.hkjc.com/chinese/racing/display_sectionaltime.asp?RaceDate="+racedate+"&Raceno="+(n+1));
// System.out.println(srcHtml);
if (secHtml == null) continue;
parser.setInputHTML(secHtml);
NodeList nlSecTable = parser.extractAllNodesThatMatch(SecTableFilter);
// for (int i=0;i<nlSecTable.size();i++) {
// System.out.println(i + ": " + nlSecTable.elementAt(i).toHtml());
// }
if (nlSecTable.size() > 0) {
TableTag table = (TableTag)nlSecTable.elementAt(0);
List<TableRow> Rows = new ArrayList<TableRow>();
for (int i=0;i<table.getChildren().size();i++) {
Node child = table.getChildren().elementAt(i);
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -