⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 mytest.java

📁 解析html网页
💻 JAVA
字号:
package wxx;


import java.io.BufferedReader;  
import java.io.IOException;  
import java.io.InputStreamReader;  
import java.net.MalformedURLException;  
import java.net.URL;  
import java.util.ArrayList;  
import java.util.HashMap;  
import java.util.List;  
import java.util.regex.Matcher;  
import java.util.regex.Pattern;  
  
public class MyTest {

	/**
	 * @param args
	 */
	public String getOneHtml(final String htmlurl) throws IOException  
	 {  
	  URL url;  
	  String temp;  
	  final StringBuffer sb = new StringBuffer();  
	  try  
	  {  
	   url = new URL(htmlurl);  
	   final BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));// 读取网页全部内容  
	   while ((temp = in.readLine()) != null)  
	   {  
	    sb.append(temp);  
	   }  
	   in.close();  
	  }  
	  catch (final MalformedURLException me)  
	  {  
	   System.out.println("你输入的URL格式有问题!请仔细输入");  
	   me.getMessage();  
	   throw me;  
	  }  
	  catch (final IOException e)  
	  {  
	   e.printStackTrace();  
	   throw e;  
	  }  
	  return sb.toString();  
	 }  
	
	
	/** 
	  *  
	  * @param s 
	  * @return 获得网页标题 
	  */  
	 public String getTitle(final String s)  
	 {  
	  String regex;  
	  String title = "";  
	  final List<String> list = new ArrayList<String>();  
	  regex = "<title>.*?</title>";  
	  final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ);  
	  final Matcher ma = pa.matcher(s);  
	  while (ma.find())  
	  {  
	   list.add(ma.group());  
	  }  
	  for (int i = 0; i < list.size(); i++)  
	  {  
	   title = title + list.get(i);  
	  }  
	  return outTag(title);  
	 }  
	 
	 /** 
	  *  
	  * @param s 
	  * @return 去掉标记 
	  */  
	 public String outTag(final String s)  
	 {  
	  return s.replaceAll("<.*?>", "");  
	 }  
	  
	 
	public static void main(String[] args) throws Exception {
		// TODO Auto-generated method stub
		System.out.println("hello");
		String url = "http://youxi.zol.com.cn/pc/index4869.html";
		MyTest mt = new MyTest();
		System.out.println(mt.getOneHtml(url));
	}

}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -