📄 main.java
字号:
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.List;
import java.util.Random;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class Main {
/**
* 源文件的URL地址
*/
private static String URL = "http://www.cvh.org.cn/difangzhi/qinling/list.asp?pgno=";
/**
* pdf的基本URL地址
*/
private static String PDFURL = "http://www.cvh.org.cn/difangzhi/qinling/";
/**
* 将提取的所有文件链接保存在ArrayList里面就是pdfURLs
*/
private static ArrayList<String> pdfURLs = new ArrayList<String>();
/**
* HTML内容
*/
private static String string = new String();
/**
* 总的页码
*/
private static final int pageNum = 153;
/**
* 主函数的入口
* @param args
*/
public static void main(String[] args) {
//
Main m = new Main();
m.run();
System.out.println("总的文件个数:" + pdfURLs.size());
// for (String s : pdfURLs) {
// m.getFileName(s);
// System.out.println("LIST " + s);
// }
}
/**
* 运行程序
*
*/
public void run() {
try {
Thread.sleep(2000);
} catch (InterruptedException e) {
e.printStackTrace();
}
System.out.println("开始了");
for (int i = 1; i < pageNum; i++) {
String targetUrl = URL + i;
System.out.println("正在提取目标网址...\n " + targetUrl);
this.getPDFURL(this.getContentByURL(targetUrl));// 提取PDF的url存放于list中
System.out.println(targetUrl + "\n===提取完毕===");
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
System.out.println("\n\n\n\n===网址全部提取完毕===\n\n\n\n");
System.out.println("===正在下载并且保持文件===\n\n\n\n");
for (String s : pdfURLs) {
this.saveFile(PDFURL + s, "C:\\"+this.getFileName(s));
try {
Thread.sleep(3000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
}
/**
* 截取文件名称
* 如果截取出错,返回以R开头的随机数字,共5位
* @param s
* @return
*/
private String getFileName(String s) {
int i = 0;
i = s.lastIndexOf('/');
if (i >= 0) {
System.out.println("名称是:" + i + "\n====" + s.substring(i + 1));
return s.substring(i + 1);
}
return ("R" + (new Random().nextInt())).substring(0,5);//如果截取出错,返回以R开头的随机数字,共5位
}
/**
* 用正则表达式提取pdf文件的 url <a href="page/1(5)/003.pdf"
* 将提取的所有文件链接保存在ArrayList里面就是pdfURLs
*
* @param content
* @return
*/
public List getPDFURL(String content) {
String patternString = "page/.+pdf";
Pattern pattern = Pattern.compile(patternString,
Pattern.CASE_INSENSITIVE);
Matcher matcher = pattern.matcher(content);
System.out.println("match");
while (matcher.find()) {
int start = matcher.start();
int end = matcher.end();
String match = content.substring(start, end);
pdfURLs.add(match);
System.out.println(match);
}
return pdfURLs;
}
/**
* 获取网页的内容,
* @param fileWithURL
* @return 网页的内容HTML源代码!!!
*/
public String getContentByURL(String fileWithURL) {
try {
URL url = new URL(fileWithURL);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
int filelen = conn.getContentLength();
byte[] filebuf = new byte[filelen];
byte[] tmpbuf = new byte[1024];
int readcount = 0;
int readnum = 0;
while (readcount < filelen && readnum != -1) {
readnum = is.read(tmpbuf);
if (readnum > -1) {
System.arraycopy(tmpbuf, 0, filebuf, readcount, readnum);
readcount = readcount + readnum;
}
}
if (readcount < filelen) {
System.out.println("download error");
}
string = new String(filebuf);
} catch (Exception e) {
e.printStackTrace();
} finally {
System.out.println("Get HTML SRC OK: ");
}
return string;
}
/**
*
*
* @param fileurl 文件url地址
* @param savepath 保存地址
* @return 文件是否保存成功
*/
public boolean saveFile(String fileurl, String savepath) {
File savefile = new File(savepath);
try {
URL url = new URL(fileurl);
URLConnection conn = url.openConnection();
InputStream is = conn.getInputStream();
int filelen = conn.getContentLength();
byte[] filebuf = new byte[filelen];
byte[] tmpbuf = new byte[1024];
int readcount = 0;
int readnum = 0;
while (readcount < filelen && readnum != -1) {
readnum = is.read(tmpbuf);
if (readnum > -1) {
System.arraycopy(tmpbuf, 0, filebuf, readcount, readnum);
readcount = readcount + readnum;
}
}
if (readcount < filelen) {
System.out.println("download error");
return false;
}
if (!savefile.exists()) {
savefile.createNewFile();
}
FileOutputStream fos = new FileOutputStream(savefile);
fos.write(filebuf);
fos.close();
} catch (Exception e) {
e.printStackTrace();
return false;
} finally {
System.out.println("File Save OK: " + savefile);
}
return true;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -