📄 webparser.java
字号:
package chapter2;
import java.io.*;
import java.net.*;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
public class WebParser {
private static String src_File_Path = "D:\\workshop\\ch2\\htmlsrc.html";
private static String dst_File_Path = "D:\\workshop\\ch2\\puresrc.txt";
public static void main(String[] args) throws IOException {
try {
Parser();
} catch (IOException e) {
System.err.println("下载失败,请检查输入地址是否正确。");
System.exit(1);
}
}
public static void Parser() throws IOException {
try {
boolean bContent = true;
StringBuffer sBuffer = new StringBuffer(8096*2);
char[] cBuffer = new char[8096*2];
int nCount = 0;
File srcfile = new File(src_File_Path);
FileReader fpReader = new FileReader(srcfile);
File dstfile = new File(dst_File_Path);
FileWriter fpWriter = new FileWriter(dstfile);
nCount = fpReader.read(cBuffer);
for(int i = 0; i < nCount;i++)
{
if( bContent == false )
{
if(cBuffer[i] == '>')
bContent = true;
else
continue;
} else {
if(cBuffer[i] == '<')
{
bContent = false;
continue;
} else if(cBuffer[i] == '\n' || cBuffer[i] == ' ' || cBuffer[i] == ' ' || cBuffer[i] == ' ')
{
continue;
}else if( cBuffer[i] == '&' && cBuffer[i+1] == 'n'
&& cBuffer[i+2] == 'b' && cBuffer[i+3] == 's'
&& cBuffer[i+4] == 'p' && cBuffer[i+5] == ';')
{
i =i+5;
continue;
}
sBuffer.append(cBuffer[i]);
}
}
System.out.println(sBuffer.toString());
fpWriter.write(sBuffer.toString());
fpReader.close();
fpWriter.close();
} catch (UnknownHostException e) {
System.err.println("无法访问指定主机.");
System.exit(1);
} catch (IOException e) {
throw e;
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -