📄 writer.java
字号:
package gather;
import java.io.File;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URL;
import java.net.URLConnection;
import java.nio.charset.Charset;
import java.nio.charset.IllegalCharsetNameException;
import java.nio.charset.UnsupportedCharsetException;
public class Writer
{
public final int FL_50K = 50 * 1024;
public final int FL_100K = 100 * 1024;
public final int FL_500K = 500 * 1024;
public final int FL_1M = 1024 * 1024;
public final int FL_5M = 5 * 1024 * 1024;
int rlength = 0;
int nlength = 0;
private int nUrlPath = 0;
private String[] urlPath = new String[100];
static long nSaveFileCount = 0;
private final int nSLEEPTIME = 1 * 60000;
byte[] buf = new byte[10240];
byte[] inbuf = new byte[10240];
byte[] tembuf = new byte[10240];
public Writer()
{
}
public void memcpy(byte[] src, int srcpos, byte[] dest, int destpos, int length)
{
for (int i = 0; i < length; i++)
{
dest[destpos + i] = src[srcpos + i];
}
}
public String findCharset(byte[] buf)
{
if (buf == null || buf.length <= 0)
return "";
String strcharset = "";
String addEr = " 将使用默认的charset。";
String strtem;
final String strCHARSETMARK = "charset=";
int n = 0;
int pos = 0;
int i = 0;
try
{
strtem = new String(buf);
strtem = strtem.toLowerCase();
n = strtem.indexOf(strCHARSETMARK);
if (n > -1)
{
pos = n + strCHARSETMARK.length();
for (i = pos; (i < pos + 30) && (i < buf.length); i++)
{
if ((strtem.charAt(i) == '"') || (strtem.charAt(i) == '\'') || (strtem.charAt(i) == '/') || (strtem.charAt(i) == '>') || (strtem.charAt(i) == 0x20) || (strtem.charAt(i) == 0x0d) || (strtem.charAt(i) == 0x0a) || (strtem.charAt(i) == ',') || (strtem.charAt(i) == ';'))
{
break;
}
}
strcharset = strtem.substring(pos, i);
}
Charset cs = Charset.forName(strcharset);
}
catch (UnsupportedCharsetException e)
{
strcharset = "";
}
catch (IllegalCharsetNameException e)
{
strcharset = "";
}
catch (IllegalArgumentException e)
{
strcharset = "";
}
catch (Exception e)
{
strcharset = "";
}
return strcharset;
}
public void savetofile(String strPath, String strFileExt, byte[] buf)
{//说明:strFileExt是指定保存到文件的后缀名,从链接的后三个字符读出,不带“.”
try
{
new File(strPath).mkdirs();
String strfile = "";
strfile = strfile.format(strPath + "%08X." + strFileExt, nSaveFileCount++);
FileOutputStream fos = new FileOutputStream(strfile);
fos.write(buf);
fos.close();
}
catch (Exception e)
{
e.printStackTrace();
}
}
public int writeData(URLConnection con, String strurl, boolean bSaveToFile) throws Exception
{
int n = 0;
String strhtml = "";
String strcharset = "";
String strHost = "noname";
String strWritePath = "D:\\temp\\";
String strSubPath = "";
try
{
int len = 0;
nlength = 0;
nUrlPath = parseURLPath(strurl);//解析BaseURL
strHost = new URL(strurl).getHost();
if (strHost == null || strHost.length() == 0)
strHost = "noname";
strWritePath = "D:\\temp\\" + strHost + "\\";
// ------------------------------------------------------------
HttpURLConnection httpurlconn;
httpurlconn = (HttpURLConnection) con;
int nnnn = 0;
String strHeaderField = "";
if (!bSaveToFile)
{//如果不写入文件,则读取头数据,并寻找charset:
while (strHeaderField != null)
{
strHeaderField = httpurlconn.getHeaderField(nnnn++);
if (strHeaderField != null)
{
if (strHeaderField.toLowerCase().contains("charset="))
{
strcharset = findCharset(strHeaderField.getBytes());// 在HTTP头数据中寻找charset
if (!strcharset.isEmpty())
break;
}
}
}
}
// ------------------------------------------------------------
// ------------------------------------------------------------
InputStream br = con.getInputStream();// 获取数据流
while ((len = br.read(inbuf)) > 0)
{
tembuf = new byte[nlength];
memcpy(buf, 0, tembuf, 0, nlength);
buf = new byte[nlength + len];
memcpy(tembuf, 0, buf, 0, nlength);
memcpy(inbuf, 0, buf, nlength, len);
nlength += len;
}
// System.out.println("数据下载完成。" + strurl);
br.close();// 关闭数据流:必须关闭,否则网络连接的端口资源会被占完!!!
if (bSaveToFile)
{// 需要写入文件:
strSubPath = "";
if (nlength >= 5120 && nlength < FL_100K)
{
// strSubPath = "FL_100K\\";
}
else if (nlength >= FL_100K && nlength < FL_500K)
{
strSubPath = "FL_500K\\";
}
else if (nlength >= FL_500K && nlength < FL_1M)
{
strSubPath = "FL_1M\\";
}
else if (nlength >= FL_1M)
{
strSubPath = "FL_Over1M\\";
}
else
{
strSubPath = "";
System.out.println("------------------------>该数据长度小于指定值,丢弃。" + nlength);
}
if (!strSubPath.isEmpty())
savetofile(strWritePath + strSubPath, strurl.substring(strurl.length() - 4), buf);
}
else
{// 需要进行链接分析:
if (strcharset.isEmpty())// 如果在HTTP头数据中没有寻找charset,则进一步在:
{
strcharset = findCharset(buf);// 网页中寻找到“charset”,并读出其指示的字符集类型
}
if (strcharset.isEmpty())
{// 如果以上两步都没有找到charset,则手动指定默认值:
strcharset = "gbk";
}
strhtml = new String(buf, 0, nlength, strcharset);
// ----------------------------------------------
// 读取需要的文件链接:
getIMGLinkText(strhtml);
getBackgroundLinkText(strhtml);
getOtherUrlLink(strhtml, strcharset);
}
}
catch (Exception e)
{
System.out.println("Error:_Writer::" + e);
}
// ---------------------------------------------------------------------------
return n;
}
public int parseURLPath(String strurl)
{
int re = 0;
int nCount = 0;
if ((null == strurl) || (strurl.length() == 0))
return 0;
strurl = strurl.toLowerCase();
try
{
URL url = new URL(strurl);
urlPath[nCount++] = "http://" + url.getHost() + "/";
String strpath = url.getPath();
if (strpath.length() > 0)
strpath = url.getPath().substring(1);
while (strpath.contains("/"))
{
int pos = strpath.indexOf("/") + 1;
urlPath[nCount++] = strpath.substring(0, pos);
strpath = strpath.substring(pos);
}
re = nCount;
}
catch (Exception e)
{
System.out.println("Error:_HTML_parseURLPath:" + strurl + e.getMessage());
}
return re;
}
public void getIMGLinkText(String strHtml)
{
String line = "";
String strBZ = "<img src=";
String strmylinktext = "";
int nstartpos = 0;
int nstoppos = 0;
int pos = 0;
int i = 0;
boolean bSaveToFile;
int nCount = 0;
int nCount2 = 0;
if (strHtml == null || strHtml.isEmpty())
return;
try
{
strHtml = strHtml.toLowerCase();
while (pos != -1)
{
pos = strHtml.indexOf(strBZ, nstoppos);
if (-1 == pos)
break;
nCount++;
char c = strHtml.charAt(pos + strBZ.length());
if ('"' == c || '\'' == c)
nstartpos = pos + strBZ.length() + 1;
else
nstartpos = pos + strBZ.length();
for (i = nstartpos; i < strHtml.length(); i++)
{
if ((strHtml.charAt(i) == '"') || (strHtml.charAt(i) == '\'') || (strHtml.charAt(i) == '>') || (strHtml.charAt(i) == 0x20) || (strHtml.charAt(i) == 0x0d) || (strHtml.charAt(i) == 0x0a) || (strHtml.charAt(i) == ',') || (strHtml.charAt(i) == ';'))
{
break;
}
}
nstoppos = i;
strmylinktext = strHtml.substring(nstartpos, i);
strmylinktext = completeURL(strmylinktext);
//----------------------------------------------
gather g = new gather();
g.strurl = strmylinktext;
if (strmylinktext.endsWith(".jpg") || strmylinktext.endsWith(".bmp") || strmylinktext.endsWith(".jpeg"))
{
if (!controler.IMGList.contains(strmylinktext))
{
controler.IMGList.add(strmylinktext);
controler.MissionList.add(strmylinktext);
}
}
}
}
catch (Exception e)
{
System.out.println("Error:" + e.getMessage());
}
}
public void getBackgroundLinkText(String strHtml)
{
String line = "";
String strBZ = "background=";
String strmylinktext = "";
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -