📄 mytest.java
字号:
package wxx;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
public class MyTest {
/**
* @param args
*/
public String getOneHtml(final String htmlurl) throws IOException
{
URL url;
String temp;
final StringBuffer sb = new StringBuffer();
try
{
url = new URL(htmlurl);
final BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));// 读取网页全部内容
while ((temp = in.readLine()) != null)
{
sb.append(temp);
}
in.close();
}
catch (final MalformedURLException me)
{
System.out.println("你输入的URL格式有问题!请仔细输入");
me.getMessage();
throw me;
}
catch (final IOException e)
{
e.printStackTrace();
throw e;
}
return sb.toString();
}
/**
*
* @param s
* @return 获得网页标题
*/
public String getTitle(final String s)
{
String regex;
String title = "";
final List<String> list = new ArrayList<String>();
regex = "<title>.*?</title>";
final Pattern pa = Pattern.compile(regex, Pattern.CANON_EQ);
final Matcher ma = pa.matcher(s);
while (ma.find())
{
list.add(ma.group());
}
for (int i = 0; i < list.size(); i++)
{
title = title + list.get(i);
}
return outTag(title);
}
/**
*
* @param s
* @return 去掉标记
*/
public String outTag(final String s)
{
return s.replaceAll("<.*?>", "");
}
public static void main(String[] args) throws Exception {
// TODO Auto-generated method stub
System.out.println("hello");
String url = "http://youxi.zol.com.cn/pc/index4869.html";
MyTest mt = new MyTest();
System.out.println(mt.getOneHtml(url));
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -