📄 readhtml.java
字号:
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.net.URL;
import java.net.URLConnection;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.sun.org.apache.xerces.internal.dom.DocumentImpl;
public class ReadHtml
{
public ReadHtml()
{
}
public static void main(String[] args)
{
String urlString="http://news.hit.edu.cn/articles/2008/11-27/11105934.htm";
ReadHtml rh=new ReadHtml();
try
{
String contentString=rh.getDocumentAt(urlString);
String Testcontent = contentString.replaceAll("<[^>]*>","");
String printcontent=rh.htmlToStr(contentString);
// System.out.println(Testcontent+"ok");
Document document=new DocumentImpl();
DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
DocumentBuilder builder = factory.newDocumentBuilder();
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream("index.xml")));
document = builder.parse(new InputSource(br));
// Element x=document.getElementById("p");
NodeList nl = document.getElementsByTagName("title");
Node mynode = nl.item(0);
Node node = mynode.getFirstChild();
String nameSpace = node.getNodeValue();
System.out.print("标题: ");
System.out.println(nameSpace);
System.out.println("正文: ");
nameSpace="";
nl = document.getElementsByTagName("p");
//System.out.println(nl.getLength());
for(int i =0;i<nl.getLength();i++)
{
mynode = nl.item(i);
int h=mynode.getChildNodes().getLength();
for(int j=0;j<h; j++)
{
node = mynode.getChildNodes().item(j);
if(node.getNodeValue()!=null)
nameSpace = nameSpace+node.getNodeValue();
}
}
System.out.println(nameSpace);
}
catch(Exception e)
{
e.printStackTrace();
}
}
private String getDocumentAt(String urlString) throws Exception
{
StringBuffer content = new StringBuffer();
// 初始化URL
URL url = new URL(urlString);
// 从URL地址获取连接
URLConnection connection = url.openConnection();
// 读取页面
BufferedReader reader = new BufferedReader(new InputStreamReader(
connection.getInputStream()));
String line = null;
while ((line = reader.readLine()) != null)
{
content.append(line + "\n");
}
reader.close();
String contentString = content.toString();
return contentString;
}
public String htmlToStr(String htmlStr)
{
String result = "";
boolean flag = true;
if(htmlStr==null){
return null;
}
char[] a = htmlStr.toCharArray();
int length=a.length;
for(int i=0;i<length;i++){
if(a[i]=='<'){
flag=false;
continue;
}
if(a[i]=='>'){
flag=true;
continue;
}
if(flag==true){
result+=a[i];
}
}
return result.toString();
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -