⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 document.java

📁 用httpclient-4.0-alpha2 打造基于http协议的网站分析器
💻 JAVA
字号:
package com.ue.browser;

import java.util.ArrayList;
import com.ue.browser.core.EventHandler;
import com.ue.browser.core.HTMLElement;
import com.ue.browser.core.Event;
import com.ue.browser.util.ParseDocument;
import com.ue.browser.util.StringUtil;


public class Document extends HTMLElement {
	
	private String body;
	
	private ArrayList<HTMLElement> elements;
	
	private ArrayList<Frame> frames = new ArrayList<Frame>();
	
	private ArrayList<Iframe> iframes = new ArrayList<Iframe>();
	
	private ParseDocument pd = new ParseDocument(this.getBrowser());
	
	public void setBody(String body) {
		this.body = body;
		pd.parse(body);
		this.elements = pd.getElements();
		Javascript jso = new Javascript(elements,this.getBrowser());
		jso.buildEvents();
		
		if(pd.isHaveIframe()){
			ArrayList<String> al = this.ParsePageIframeSrc();
			for(String src:al){
				//System.err.println(src);
				iframes.add(new Iframe(src,this.getBrowser()));
			}
			//System.err.println(iframes.size());
		}
		
	    if(pd.isHaveFrame()){
			ArrayList<String> al = this.ParsePageFrameSrc();
			for(String src:al){
				frames.add(new Frame(src,this.getBrowser()));
			}
		}
		
		
		
	}
	
	public String getBody() {
		return body;
	}
	
	public ArrayList<HTMLElement> getElements() {
		return elements;
	}

	public void setElements(ArrayList<HTMLElement> elements) {
		this.elements = elements;
	}

	public Document(Browser browser) {
		super(browser);
		this.setTag("Document");
	}
	
	private void sendEvent(){
		//��Ĭ���¼�onLoad�ȷ�����������
	}
	
    //获取页面包含的IFRAME元素的src属性值
	public ArrayList<String> ParsePageIframeSrc(){
		ArrayList<String> al = new ArrayList<String>();
		String ifurl = "";
				
     try{
		for(HTMLElement he:elements){
		String tagname = he.getTag();
		if("Iframe".equals(tagname) && he.getAttribute("src")!= null) {
			ifurl = he.getAttribute("src");
            //拿到需要访问的IFRAME的URL
			String url = he.getBrowser().getDocument().getUrl();
			String aurl = StringUtil.getDomain(url);
			String lastpath = StringUtil.getLastPath(url);
			String protocol = StringUtil.getProtocol(url);
			/*由于地址的多样性,在被使用前必须得判断,否则httpclient会被抛出异常
			 *默认协议为http,可以调用StringUtil类中对协议的判断方法
			 * */
			 
			  if(ifurl.startsWith("//"))		  ifurl = protocol + ":" + ifurl;
			  //绝对路径
			  if(StringUtil.isSlashStart(ifurl))  ifurl = aurl + ifurl;
			  //相对路径之下级目录
			  if(StringUtil.isLetterOrNumberStart(ifurl) && !ifurl.startsWith("http:") && !ifurl.startsWith("https:")){
				  ifurl = lastpath + "/" + ifurl;
			  }
			  //相对路径之上级目录
			  if(ifurl.startsWith("..")) {
				  int n = StringUtil.DoubleDotNum(ifurl);
				  String truepath = StringUtil.getTruePath(url,n);
				  String filename = StringUtil.getAppath(ifurl);
				  ifurl = truepath + "/" + filename;
			  }
						
				//ifResult = browser.JsNavigate(ifurl);
                if(ifurl != null){
                	al.add(ifurl);
			     }
		   }
		}
		}
		catch(Exception e){
			e.printStackTrace();
		}
		return al;
	}
	
    //获取页面包含的IFRAME元素的src属性值
	public ArrayList<String> ParsePageFrameSrc(){
		ArrayList<String> al = new ArrayList<String>();
		String ifurl = "";
				
     try{
		for(HTMLElement he:elements){
		String tagname = he.getTag();
		if("Frame".equals(tagname) && he.getAttribute("src")!= null) {
			ifurl = he.getAttribute("src");
            //拿到需要访问的IFRAME的URL
			String url = he.getBrowser().getDocument().getUrl();
			String aurl = StringUtil.getDomain(url);
			String lastpath = StringUtil.getLastPath(url);
			String protocol = StringUtil.getProtocol(url);
			/*由于地址的多样性,在被使用前必须得判断,否则httpclient会被抛出异常
			 *默认协议为http,可以调用StringUtil类中对协议的判断方法
			 * */
			 
			  if(ifurl.startsWith("//"))		  ifurl = protocol + ":" + ifurl;
			  //绝对路径
			  if(StringUtil.isSlashStart(ifurl))  ifurl = aurl + ifurl;
			  //相对路径之下级目录
			  if(StringUtil.isLetterOrNumberStart(ifurl) && !ifurl.startsWith("http:") && !ifurl.startsWith("https:")){
				  ifurl = lastpath + "/" + ifurl;
			  }
			  //相对路径之上级目录
			  if(ifurl.startsWith("..")) {
				  int n = StringUtil.DoubleDotNum(ifurl);
				  String truepath = StringUtil.getTruePath(url,n);
				  String filename = StringUtil.getAppath(ifurl);
				  ifurl = truepath + "/" + filename;
			  }
						
				//ifResult = browser.JsNavigate(ifurl);
                if(ifurl != null){
                	al.add(ifurl);
			     }
		   }
		}
		}
		catch(Exception e){
			e.printStackTrace();
		}
		return al;
	}

	@Override
	public void handleEvent(Event event) {
		// TODO Auto-generated method stub
		
	}

	@Override
	public void attachEventHandler(int click, EventHandler eventhandler) {
		// TODO Auto-generated method stub
		
	}

	public ArrayList<Frame> getFrames() {
		return frames;
	}

	public void setFrames(ArrayList<Frame> frames) {
		this.frames = frames;
	}

	public ArrayList<Iframe> getIframes() {
		return iframes;
	}

	public void setIframes(ArrayList<Iframe> iframes) {
		this.iframes = iframes;
	}

	
	
	
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -