📄 parser.java
字号:
//package firstproject;
import java.io.*;
import java.net.*;
public class Parser {
String[] urllist; //saving the parser's result
String[] urlnamelist;
String str;
String str_encode;
String keywordlist;
String str_head;
String str_tail;
String str_end;
int count;
int downcount;
/*Baiduparser(){
urllist = new String[1000];
str = new String();
str_encode = new String();
keywordlist = new String("ITNLP");
str_head = new String("<a href=\""); //if string contains " there should be a \before it
str_tail = new String("\">");
count = 0;
downcount=1000;
}*/
Parser(int a){
urllist = new String[100];
urlnamelist = new String[100];
str = new String();
str_encode = new String();
keywordlist = new String("ITNLP");
str_head = new String("<a accesskey="); //if string contains " there should be a \before it
str_tail = new String("\">");
str_end = new String("</a>");
count = 0;
downcount=a; //The number of urls which are requested
}
public String printurl(int i){
return this.urllist[i]+'\n';
}
public void setkeyword(String strlist) {
keywordlist = strlist;
}
public void parse() {
try{
str_encode = URLEncoder.encode(keywordlist, "UTF-8");
}
catch(UnsupportedEncodingException ex){
throw new RuntimeException("Broken VM does not support UTF-8");
}
// str_encode = keywordlist;
// System.out.println(str_encode);
for(int i=0;i<downcount/5+1;i++)
{
count=0;
String str_temp = String.valueOf(i*5);
try {
URL url = new URL("http://www.google.cn/m/search?mrestrict=wml&site=mobile&q=" + str_encode + "&start=" + str_temp + "&sa=N");
// Read all the text returned by the server
BufferedReader in = new BufferedReader(new InputStreamReader(url.openStream()));
while ( (str = in.readLine()) != null && count < 5) {
System.out.println(str);
int pos2 = 0; //point to the str_tail
while(pos2 < str.length() && count < 5){
while(!str.startsWith(str_tail,pos2) && pos2 < str.length()){
pos2++;
}
if(pos2 == str.length()){
pos2 = 0;
break;
}
int pos1 = pos2; //pos1 points to the str_head
while(!str.startsWith(str_head,pos1) && pos1 >= 0){
pos1--;
}
if(pos1 < 0){
pos2++;
continue;
}
int pos3 = pos2;
while(!str.startsWith(str_end,pos3) && pos3 < str.length()){
pos3++;
}
if(pos3 == str.length()){
pos2 = 0;
break;
}
urllist[count+5*i] = str.substring(pos1+str_head.length()+10, pos2);
urlnamelist[count+5*i] = str.substring(pos2+str_tail.length(),pos3);
count++;
pos2++;
}
}
in.close();
}
catch (MalformedURLException q) {
}
catch (IOException w) {
}
}
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -