⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 chtml_trans.java

📁 chtml转换模块
💻 JAVA
📖 第 1 页 / 共 2 页
字号:
				strPort = getValue(configstr, "proxyPort");
			}
			Properties sys = System.getProperties();
			sys.put("proxySet", "true");
			sys.put("proxyHost", strProxy);
			sys.put("proxyPort", strPort);
			System.setProperties(sys);*/
			java.net.HttpURLConnection l_connection = (java.net.HttpURLConnection) l_url
					.openConnection();
			
			l_connection.setRequestProperty("user-agent", ua);
			l_connection.setRequestProperty("x-up-calling-line-id", phoneNo);
			// l_connection.setRequestProperty("accept-charset", "GB2312");
			l_connection.connect();
			l_urlStream = l_connection.getInputStream();
			java.io.BufferedReader l_reader = new java.io.BufferedReader(
					new java.io.InputStreamReader(l_urlStream, "8859_1"));
			while ((sCurrentLine = l_reader.readLine()) != null) {
//				System.out.println("");
//				System.out.println(sCurrentLine);
				sTotalString += sCurrentLine;
			}
			// 用找到的字符集,来新建输入流
			Pattern p = Pattern.compile("charset[ \t\n]*=[ \t\n]*([^>\"$|^>/$]+)",Pattern.CASE_INSENSITIVE);
			Matcher m = p.matcher(sTotalString);
			if (m.find()) {
//				System.out.print(m.group()+"================================");
				String charset = m.group().trim().substring(8);
				ByteArrayOutputStream baos = new ByteArrayOutputStream();
				byte[] data = baos.toByteArray();
				data = sTotalString.getBytes("8859_1");
				String result = new String(data, charset);
				return result;
			}
			else{
				
				ByteArrayOutputStream baos = new ByteArrayOutputStream();
				byte[] data = baos.toByteArray();
				data = sTotalString.getBytes("8859_1");
				String result = new String(data, "GB2312");
				return result;
			}
			
		} catch (java.io.FileNotFoundException e) {
			e.printStackTrace();
			System.out.println(e.getMessage());
			System.out.println("找不到网页:");
			System.out.println("请返回");
			return "错误:找不到网页,请返回!";
		} catch (Exception e) {
			e.printStackTrace();
			return e.getMessage();
		}

	}

	/*
	 * 对tr结束标志,添加br
	 */

	public static String dealTR(String inputHtml) throws Exception {
		// 查找是否有tr的结束标记
//		String key="</tr>";
//		Pattern p = Pattern.compile(key,Pattern.CASE_INSENSITIVE);
//		Matcher m = p.matcher(inputHtml);
//		if(m.matches()){
//			m.replaceAll(key+"<br>");
//		}
	if (inputHtml.indexOf("</tr>") != -1
				|| inputHtml.indexOf("</TR>") != -1) {
			// 如果有,则在其后添加换行符
			inputHtml= inputHtml.replaceAll("</tr>", "</tr><br>");
			inputHtml = inputHtml.replaceAll("</TR>", "</TR><BR>");
			
		}
	/*	if (inputHtml.indexOf("</td>") != -1
				|| inputHtml.indexOf("</TD>") != -1) {
			// 如果有,则在其后添加换行符
			inputHtml= inputHtml.replaceAll("</td>", "</td><br>");
			inputHtml = inputHtml.replaceAll("</TD>", "</TD><BR>");
			
		}*/
	return	rmTable(inputHtml);
	}

	/*
	 * 把iframe所包含的网址提取出来,用取得的网页内容,替换iframe标签
	 */
	public static String dealFrame(String ua,String phoneNo,String inputHtml) throws Exception {

		try {
			// 查找是否有</iframe>的开始标记
			while (inputHtml.indexOf("<iframe") != -1) {
				// 如果有,则查找其后第一个src
				int ifram_local = inputHtml.indexOf("<iframe"); // 找到iframe起始位置
				int src_local = inputHtml.indexOf("src", ifram_local); // 找到src起始位置
				int fir_url_local = inputHtml.indexOf("\"", src_local); // 找到第一个引号起始位置
				int sec_url_local = inputHtml.indexOf("\"", fir_url_local + 1); // 找到第二个引号起始位置
				int end_local = inputHtml.indexOf("</iframe>",
						sec_url_local + 1); // 找到iframe结束位置
				String ifram_src = inputHtml.substring(fir_url_local + 1,
						sec_url_local); // 得到src包含的url
				System.out.println("iframe_src:" + ifram_src);
				if (ifram_src.startsWith("http") == false) {
					if (ifram_src.startsWith("/") == false) {
						ifram_src = host_site + "/" + ifram_src;
					} else if (ifram_src.startsWith("/") == true) {
						ifram_src = host_site + ifram_src;
					}
				}
				String src_content = new String(getHtml_by_url(ua,phoneNo,ifram_src)); // 调用getHtml(),得到具体网页内容
				String ifram_all = inputHtml.substring(ifram_local,
						end_local + 9); // 用得到的网页内容,替换iframe标签
				inputHtml = inputHtml.replace(ifram_all, src_content);

			}
			// System.out.println(inputHtml);
			return inputHtml;
		} catch (Exception ex) {
			System.out.println(ex);
			return inputHtml;

		}

	}

	/*
	 * 解决图片相对路径问题
	 */
	public static String dealImg(String inputHtml) {
		try {
			Lexer lexer = new Lexer(inputHtml);
			// System.out.print(lexer.toString());
			Node node = null;
			Vector<String> al = new Vector<String>();
			while (null != (node = lexer.nextNode())) {
				if (node instanceof TagNode) {
					TagNode tag = (TagNode) node;
					if (tag.getTagName().equalsIgnoreCase("img")) {
						String imgSrc = tag.getAttribute("src");
						if (null != imgSrc) {
							// System.out.println("before:------------------"+imgSrc);
							if (imgSrc.startsWith("http") == false) {
								// 把所有找到的图片url都放到一个vector类型中,并保证不重复
								if (al.contains(imgSrc) == false) {
									al.add(imgSrc);
								}
								// System.out.println(inputHtml);
							}
						}
					}
				}

			}
			for (int i = 0; i < al.size(); i++) {// 针对不同情况,把相对路径转成绝对路径
				String al_src = al.get(i).toString();
				if (al_src.startsWith("/..")) {
					inputHtml = inputHtml.replaceAll(al_src, "http://"
							+ host_site + "/" + al_src.substring(3));
				} else if (al_src.startsWith("/.")) {
					inputHtml = inputHtml.replaceAll(al_src, "http://"
							+ host_site + "/" + al_src.substring(2));
				} else if (al_src.startsWith("/") == true) {
					inputHtml = inputHtml.replaceAll(al_src, "http://"
							+ host_site + al_src);
				} else if (al_src.startsWith("..") == true) {
					inputHtml = inputHtml.replaceAll(al_src, "http://"
							+ host_site + "/" + al_src.substring(2));
				} else if (al_src.startsWith(".") == true) {
					inputHtml = inputHtml.replaceAll(al_src, "http://"
							+ host_site + "/" + al_src.substring(1));
				} else {
					inputHtml = inputHtml.replaceAll(al_src, "http://"
							+ host_site  +img_path+ al_src);
				}
			}
			al.clear();
			return inputHtml;
		} catch (Exception e) {
			e.printStackTrace();
			return inputHtml;
		}
	}
	/*
	 * 删除所有table标记
	 */
	public static String rmTable(String inputHtml) throws Exception {
		// 查找是否有tr的结束标记
		String table_start="<[ \f\n\r\t]*(table|TABLE)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
		String td_start="<[ \f\n\r\t]*(td|TD)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
		String tr_start="<[ \f\n\r\t]*(tr|TR)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
		String div_start="<[ \f\n\r\t]*(div|DIV)[ \f\n\r\t]*([^>]*)[ \f\n\r\t]*>";
		inputHtml=inputHtml.replaceAll(table_start, "");
		inputHtml=inputHtml.replaceAll(td_start, "");
		inputHtml=inputHtml.replaceAll(tr_start, "");
//		inputHtml=inputHtml.replaceAll(div_start, "");
		inputHtml=inputHtml.replaceAll("</(table|TABLE)>", "");
		inputHtml=inputHtml.replaceAll("</(td|TD)>", "");
		inputHtml=inputHtml.replaceAll("</(tr|TR)>", "");
//		inputHtml=inputHtml.replaceAll("</(div|DIV)>", "");
//            System.out.print(inputHtml);
			return inputHtml;
	}
	/*
	 * 读取proxy配置文件信息
	 */
	public static String getConfigFile() throws IOException {
		String filepath = chtml_trans.class.getResource("/").getPath();
		File file = new File(filepath + "config.txt");
		String line;
		String xmlString = "";
		StringBuffer strbuf = new StringBuffer("");
		InputStreamReader read = new InputStreamReader(
				new FileInputStream(file), "GB2312");
		BufferedReader reader = new BufferedReader(read);
		while ((line = reader.readLine()) != null) {
			strbuf.append(line);
		}
		xmlString = strbuf.toString();

		strbuf = null;
		return xmlString;
	}

	/*
	 * 读取配置文件中的属性值
	 */
	public static String getValue(String xmlString, String varName) {
		String value = "", sign1 = "<" + varName + ">", sign2 = "</" + varName
				+ ">";
		int begin = xmlString.indexOf(sign1) + sign1.length();
		int end = xmlString.indexOf(sign2);
		if (begin != -1 && end != -1) {
			value = xmlString.substring(begin, end);
		}
		return value;
	}
	
	public void doPost(HttpServletRequest request, HttpServletResponse response)
	throws ServletException, IOException {
		doGet(request,response);
	}
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -