getcmiqikan.java
来自「本程序是专门用于从网页上自动收集cmi,cnki上的被引文献的数据」· Java 代码 · 共 42 行
JAVA
42 行
package cn.ac.cintcm.spider.cmi;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.beans.FilterBean;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.tags.InputTag;
import org.htmlparser.util.NodeList;
public class GetCmiQiKan {
public static void main(String[] args) throws IOException {
//String resource = (new GetUrlContent("http://cnki.cintcm.ac.cn:1012/cdweb/page/kanmin.cbs?db=mulu")).getContent();
List<String> qikanList = null;
//Parser parser = new Parser(resource);
FilterBean bean = new FilterBean ();
bean.setFilters(getFilter());
bean.setURL("http://cnki.cintcm.ac.cn:1012/cdweb/page/kanmin.cbs?db=mulu");
NodeList list = bean.getNodes();
qikanList = new ArrayList<String>();
for (Node node : list.toNodeArray()) {
InputTag tag = new InputTag();
tag.setText(node.toHtml());
String name = tag.getAttribute("value");
qikanList.add(name);
System.out.println(name);
}
}
public static NodeFilter[] getFilter() {
TagNameFilter filter0 = new TagNameFilter ();
filter0.setName ("option");
NodeFilter[] array0 = new NodeFilter[1];
array0[0] = filter0;
return array0;
}
}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?