📄 chtml_trans.java
字号:
strPort = getValue(configstr, "proxyPort");
}
Properties sys = System.getProperties();
sys.put("proxySet", "true");
sys.put("proxyHost", strProxy);
sys.put("proxyPort", strPort);
System.setProperties(sys);*/
java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url
.openConnection();
l_connection.setRequestProperty("user-agent", ua);
l_connection.setRequestProperty("x-up-calling-line-id", phoneNo);
// l_connection.setRequestProperty("accept-charset", "GB2312");
l_connection.connect();
l_urlStream = l_connection.getInputStream();
java.io.BufferedReader l_reader = new java.io.BufferedReader(
new java.io.InputStreamReader(l_urlStream, "8859_1"));
while ((sCurrentLine = l_reader.readLine()) != null) {
// System.out.println("");
// System.out.println(sCurrentLine);
sTotalString += sCurrentLine;
}
// 用找到的字符集,来新建输入流
Pattern p = Pattern.compile("charset[ \t\n]*=[ \t\n]*([^>\"$|^>/$]+)",Pattern.CASE_INSENSITIVE);
Matcher m = p.matcher(sTotalString);
if (m.find()) {
// System.out.print(m.group()+"================================");
String charset = m.group().trim().substring(8);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] data = baos.toByteArray();
data = sTotalString.getBytes("8859_1");
String result = new String(data, charset);
return result;
}
else{
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] data = baos.toByteArray();
data = sTotalString.getBytes("8859_1");
String result = new String(data, "GB2312");
return result;
}
} catch (java.io.FileNotFoundException e) {
e.printStackTrace();
System.out.println(e.getMessage());
System.out.println("找不到网页:");
System.out.println("请返回");
return "错误:找不到网页,请返回!";
} catch (Exception e) {
e.printStackTrace();
return e.getMessage();
}
}
/*
* 对tr结束标志,添加br
*/
public static String dealTR(String inputHtml) throws Exception {
// 查找是否有tr的结束标记
// String key="</tr>";
// Pattern p = Pattern.compile(key,Pattern.CASE_INSENSITIVE);
// Matcher m = p.matcher(inputHtml);
// if(m.matches()){
// m.replaceAll(key+"<br>");
// }
if (inputHtml.indexOf("</tr>") != -1
|| inputHtml.indexOf("</TR>") != -1) {
// 如果有,则在其后添加换行符
inputHtml= inputHtml.replaceAll("</tr>", "</tr><br>");
inputHtml = inputHtml.replaceAll("</TR>", "</TR><BR>");
}
/* if (inputHtml.indexOf("</td>") != -1
|| inputHtml.indexOf("</TD>") != -1) {
// 如果有,则在其后添加换行符
inputHtml= inputHtml.replaceAll("</td>", "</td><br>");
inputHtml = inputHtml.replaceAll("</TD>", "</TD><BR>");
}*/
return rmTable(inputHtml);
}
/*
* 把iframe所包含的网址提取出来,用取得的网页内容,替换iframe标签
*/
public static String dealFrame(String ua,String phoneNo,String inputHtml) throws Exception {
try {
// 查找是否有</iframe>的开始标记
while (inputHtml.indexOf("<iframe") != -1) {
// 如果有,则查找其后第一个src
int ifram_local = inputHtml.indexOf("<iframe"); // 找到iframe起始位置
int src_local = inputHtml.indexOf("src", ifram_local); // 找到src起始位置
int fir_url_local = inputHtml.indexOf("\"", src_local); // 找到第一个引号起始位置
int sec_url_local = inputHtml.indexOf("\"", fir_url_local + 1); // 找到第二个引号起始位置
int end_local = inputHtml.indexOf("</iframe>",
sec_url_local + 1); // 找到iframe结束位置
String ifram_src = inputHtml.substring(fir_url_local + 1,
sec_url_local); // 得到src包含的url
System.out.println("iframe_src:" + ifram_src);
if (ifram_src.startsWith("http") == false) {
if (ifram_src.startsWith("/") == false) {
ifram_src = host_site + "/" + ifram_src;
} else if (ifram_src.startsWith("/") == true) {
ifram_src = host_site + ifram_src;
}
}
String src_content = new String(getHtml_by_url(ua,phoneNo,ifram_src)); // 调用getHtml(),得到具体网页内容
String ifram_all = inputHtml.substring(ifram_local,
end_local + 9); // 用得到的网页内容,替换iframe标签
inputHtml = inputHtml.replace(ifram_all, src_content);
}
// System.out.println(inputHtml);
return inputHtml;
} catch (Exception ex) {
System.out.println(ex);
return inputHtml;
}
}
/*
* 解决图片相对路径问题
*/
public static String dealImg(String inputHtml) {
try {
Lexer lexer = new Lexer(inputHtml);
// System.out.print(lexer.toString());
Node node = null;
Vector<String> al = new Vector<String>();
while (null != (node = lexer.nextNode())) {
if (node instanceof TagNode) {
TagNode tag = (TagNode) node;
if (tag.getTagName().equalsIgnoreCase("img")) {
String imgSrc = tag.getAttribute("src");
if (null != imgSrc) {
// System.out.println("before:------------------"+imgSrc);
if (imgSrc.startsWith("http") == false) {
// 把所有找到的图片url都放到一个vector类型中,并保证不重复
if (al.contains(imgSrc) == false) {
al.add(imgSrc);
}
// System.out.println(inputHtml);
}
}
}
}
}
for (int i = 0; i < al.size(); i++) {// 针对不同情况,把相对路径转成绝对路径
String al_src = al.get(i).toString();
if (al_src.startsWith("/..")) {
inputHtml = inputHtml.replaceAll(al_src, "http://"
+ host_site + "/" + al_src.substring(3));
} else if (al_src.startsWith("/.")) {
inputHtml = inputHtml.replaceAll(al_src, "http://"
+ host_site + "/" + al_src.substring(2));
} else if (al_src.startsWith("/") == true) {
inputHtml = inputHtml.replaceAll(al_src, "http://"
+ host_site + al_src);
} else if (al_src.startsWith("..") == true) {
inputHtml = inputHtml.replaceAll(al_src, "http://"
+ host_site + "/" + al_src.substring(2));
} else if (al_src.startsWith(".") == true) {
inputHtml = inputHtml.replaceAll(al_src, "http://"
+ host_site + "/" + al_src.substring(1));
} else {
inputHtml = inputHtml.replaceAll(al_src, "http://"
+ host_site +img_path+ al_src);
}
}
al.clear();
return inputHtml;
} catch (Exception e) {
e.printStackTrace();
return inputHtml;
}
}
/*
* 删除所有table标记
*/
public static String rmTable(String inputHtml) throws Exception {
// 查找是否有tr的结束标记
String table_start="<[ \f\n\r\t]*(table|TABLE)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
String td_start="<[ \f\n\r\t]*(td|TD)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
String tr_start="<[ \f\n\r\t]*(tr|TR)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
String div_start="<[ \f\n\r\t]*(div|DIV)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
inputHtml=inputHtml.replaceAll(table_start, "");
inputHtml=inputHtml.replaceAll(td_start, "");
inputHtml=inputHtml.replaceAll(tr_start, "");
// inputHtml=inputHtml.replaceAll(div_start, "");
inputHtml=inputHtml.replaceAll("</(table|TABLE)>", "");
inputHtml=inputHtml.replaceAll("</(td|TD)>", "");
inputHtml=inputHtml.replaceAll("</(tr|TR)>", "");
// inputHtml=inputHtml.replaceAll("</(div|DIV)>", "");
// System.out.print(inputHtml);
return inputHtml;
}
/*
* 读取proxy配置文件信息
*/
public static String getConfigFile() throws IOException {
String filepath = chtml_trans.class.getResource("/").getPath();
File file = new File(filepath + "config.txt");
String line;
String xmlString = "";
StringBuffer strbuf = new StringBuffer("");
InputStreamReader read = new InputStreamReader(
new FileInputStream(file), "GB2312");
BufferedReader reader = new BufferedReader(read);
while ((line = reader.readLine()) != null) {
strbuf.append(line);
}
xmlString = strbuf.toString();
strbuf = null;
return xmlString;
}
/*
* 读取配置文件中的属性值
*/
public static String getValue(String xmlString, String varName) {
String value = "", sign1 = "<" + varName + ">", sign2 = "</" + varName
+ ">";
int begin = xmlString.indexOf(sign1) + sign1.length();
int end = xmlString.indexOf(sign2);
if (begin != -1 && end != -1) {
value = xmlString.substring(begin, end);
}
return value;
}
public void doPost(HttpServletRequest request, HttpServletResponse response)
throws ServletException, IOException {
doGet(request,response);
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -