⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 formoperate.java

📁 本程序可从网上利用百度搜索引擎下载和输入关键词有关的网页
💻 JAVA
字号:

package spider;
import java.util.*;
import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;


public class FormOperate //
{
  private String path=null;//定义文件路径
  private String fileName=null;//定义文件名
  private String selectSql=null;//
  private List insertSqlList=new ArrayList();//定义插入语句数组
  
  public String getFileName() 
  {
		return fileName;
  }
  public void setFileName(String fileName)
	{
		this.fileName = fileName;
	}
  public void setPath(String path)
  {
	  this.path=path;
  }
  public String getPath()
  {
	  return this.path;
  }
  
  
  public void setSelectSql(String selectSql)
  {
	  this.selectSql=selectSql;
  }
  public String setselectSql()
  {
	  return this.selectSql;
  }
  public void setInsertSqlList(List insertSql) 
  {
	this.insertSqlList = insertSql;
  }
  
  public void setInsertSqlList()//获取文件中的各个页面文件,分析页面,生成sql语句,保存到list中,其中是表单的各个属性
  {
	  File pageFile=new File(path);//获取文件路径
	  File[] files=pageFile.listFiles();//获取文件
	  String oneFileString=null;//定义一个文件流
	  String oneSql=null;//定义一个sql语句
	  if(files!=null)
	  {
		  for(int i=0;i<files.length;i++)//逐个分析各个文件
		  {
			  this.setFileName(files[i].getName().toString());//设置文件名
			  FileReader fr=null;//读取文件
			  BufferedReader br=null;		  
			  StringBuffer sb=null;
			  try
			  {
				  br=new BufferedReader( new FileReader(files[i]));			  
				  sb = new StringBuffer("");//文件字符流
				  String s = "";
				  while ((s = br.readLine()) != null) 
					{
						sb.append(s + "\r\n");
				    }
				  br.close();
				  oneFileString= sb.toString();
				  
				 // System.out.println(" 文件字符流oneFileString:"+oneFileString);
				  
				  if(oneFileString!=null && oneFileString.indexOf("<form")!=-1)//分析form表单
				  {
					  
					  //分割form表单
					 String[] splitForm=oneFileString.split("<form");
					 System.out.println("文件字符流中有"+splitForm.length+"个表单");
					 //分析各个表单
					 for(int sf=1;sf<splitForm.length;sf++)
					 {	
						 //提取form表单
						String oneFormStream=null;
						int beginIndex=0;
			        	int endIndex=0;
			        	if(splitForm[sf].indexOf("form>")!=-1)
			        	{
			        		endIndex=splitForm[sf].indexOf("form>");
			        	}
			        	oneFormStream=pointForm( beginIndex,  endIndex, splitForm[sf]);//提取表单字符串
			        	
			        	oneSql=analisisForm(oneFormStream);//操作表单函数,分析属性,返回一个sql语句					 
					 
						if(oneSql!=null)//加入sql语句
						  {
							  System.out.println("形成一个oneSql语句oneSql:"+oneSql);				 
							  insertSqlList.add(oneSql);

						  }	
					 }//end for 
				  }//end if				  				  
			  }
			  catch(Exception e)
			  {
				 e.printStackTrace();
				 System.out.println("fail operate file ");
				 
			  }			  			  		  
		  }// end for
	  }//end if
	  else
	  {
		  System.out.println("没有文件");
	  }
	  

  }
  
  //操作表单 返回一个sql语句,其中是表单的各个属性
  public String analisisForm(String oneFormStream ) 
  {	 		
	      String sql=null;
          int ftexts=0;   //定义客观的信息  
          int fsubmit=0;
          int fbutton=0;          
          int fpasswords=0;
          int fhidden=0;
          int fimages=0;
          int fcheckbox=0;
          int fradio=0;
          int  fkeywordInForm=0   ;    //请输入关键字 
          int  factionSemantic=0   ;  //action 中的语意信息  
          int  fselectValueSemantic=0; // 提交按钮上的语意信息
        //  int  fsearchInForm=0      ;  //form表单中出现了search字符串
          int  fsubmitInForm=0    ;      // 没有出现提交按钮,但是用button按钮替代了提交按钮,出现了submit字符串
          int  fmailInForm=0     ;              //主观的信息
          
	      //是否有form表单

		    if(oneFormStream!=null)
				{
					//分割type
					String[] splitType=oneFormStream.split("type=");//用type分割
					for(int st=0;st<splitType.length;st++)//检查客观属性
					{
						 int beginIndex=0;
			        	 int endIndex=splitType[st].indexOf(" ");
                         String oneTypeStream=pointForm( beginIndex,  endIndex, splitType[st]);

                         if(oneTypeStream.indexOf("text")!=-1)
                         {
                        	 ftexts=1;
                         }
                         if(oneTypeStream.indexOf("submit")!=-1)
                         {
                        	 fsubmit=1;
                         }
                         if(oneTypeStream.indexOf("button")!=-1)
                         {
                        	 fbutton=1;
                         }
                         if(oneTypeStream.indexOf("hidden")!=-1)
                         {
                        	 fhidden=1;
                         }
                         if(oneTypeStream.indexOf("image")!=-1)
                         {
                        	 fimages=1;
                         }
                         if(oneTypeStream.indexOf("checkbox")!=-1)
                         {
                        	fcheckbox=1;
                         }
                         if(oneTypeStream.indexOf("radio")!=-1)
                         {
                        	 fradio=1;
                         }
                         if(oneTypeStream.indexOf("password")!=-1)
                         {
                        	 fpasswords=1;
                         }
					}//检查完毕
				
                    if(oneFormStream.indexOf("keyword")!=-1 || oneFormStream.indexOf("关键字")!=-1)
                    {
                    	fkeywordInForm=1;
                    }
                    if(oneFormStream.indexOf("search")!=-1 || oneFormStream.indexOf("搜索")!=-1 || oneFormStream.indexOf("查找")!=-1)
                    {
                    	factionSemantic=1;
                    }
                    if(oneFormStream.indexOf("<select")!=-1)
                    {
                    	fselectValueSemantic=1;
                    }
                /*    if(oneFormStream.indexOf("search")!=-1)
                    {
                    	fsearchInForm=1;
                    }*/
                    if(oneFormStream.indexOf("submit")!=-1)
                    {
                    	fsubmitInForm=1;
                    }
                    if(oneFormStream.indexOf("mail")!=-1)
                    {
                    	fmailInForm=1;
                    }
				}//end if
				 
		    sql="insert into interface(filename,texts ,submit,button , passwords ,hidden,images ,checkbox ,radio,keywordInForm,actionSemantic,selectValueSemantic,submitInForm,mailInForm) values('"+this.getFileName()+"',"+ftexts+" ,"+fsubmit+","+fbutton+" , "+fpasswords+" , "+fhidden+" , "+fimages+" , "+fcheckbox+" , "+fradio+","+fkeywordInForm+","+factionSemantic+","+fselectValueSemantic +","+fsubmitInForm+","+fmailInForm+")";
		    return sql;
			 	 
  }
  
	public String pointForm(int beginIndex, int endIndex, String spiderHtml)//从spiderHtml中取出从beginIndex到endIndex的一段字符串
	{
		String oneForm = " ";
		try 
		{
			oneForm = spiderHtml.substring(beginIndex, endIndex);
		} 
		catch (Exception e) 
		{
			System.out.println("此处有异常" + spiderHtml);
		}
		return oneForm;
	}
  public List getInsertSqlList() 
  {
	return insertSqlList;
  }


  
}










⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -