nodefilters.java

来自「本程序是专门用于从网页上自动收集cmi,cnki上的被引文献的数据」· Java 代码 · 共 87 行

JAVA
87
字号
package cn.ac.cintcm.spider.cnki;

import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.beans.FilterBean;
import org.htmlparser.filters.AndFilter;
import org.htmlparser.filters.HasAttributeFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;

public class NodeFilters {
	
	public  static NodeFilter[] getFilter(String tagName) {
		TagNameFilter filter0 = new TagNameFilter();
		filter0.setName(tagName);
		NodeFilter[] array0 = new NodeFilter[1];
		array0[0] = filter0;
		return array0;
	}
	
	public static NodeFilter[] getFilter(String tagName,String attriName, String attriValue) {
        TagNameFilter filter0 = new TagNameFilter ();
        filter0.setName (tagName);
        HasAttributeFilter filter1 = new HasAttributeFilter ();
        filter1.setAttributeName (attriName);
        filter1.setAttributeValue (attriValue);
        NodeFilter[] array0 = new NodeFilter[2];
        array0[0] = filter0;
        array0[1] = filter1;
        AndFilter filter2 = new AndFilter ();
        filter2.setPredicates (array0);
        NodeFilter[] array1 = new NodeFilter[1];
        array1[0] = filter2;
        return array1;
	}
	
	public static NodeList getNodeList(String resource,String filterName){
		Parser parser=null;
		try {
			parser = new Parser(resource);
		} catch (ParserException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		FilterBean bean = new FilterBean();
		bean.setFilters(getFilter(filterName));
		bean.setParser(parser);
		return  bean.getNodes();
	}
	
	public static NodeList getNodeList(String resource,String filterName,String attriName,String attriValue){
		Parser parser=null;
		try {
			parser = new Parser(resource);
		} catch (ParserException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		FilterBean bean = new FilterBean();
		bean.setFilters(getFilter(filterName,attriName,attriValue));
		bean.setParser(parser);
		return  bean.getNodes();
	}
	
	public static FilterBean getFilterBeans(String resource,String filterName,String attriName,String attriValue){
		Parser parser=null;
		try {
			parser = new Parser(resource);
		} catch (ParserException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		FilterBean bean = new FilterBean();
		bean.setFilters(getFilter(filterName,attriName,attriValue));
		bean.setParser(parser);
		return  bean;
	}
	
	public  static NodeFilter getSingleFilter(String tagName) {
		TagNameFilter filter0 = new TagNameFilter();
		filter0.setName(tagName);				
		return filter0;
	}

}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?