⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 savetodatabase.java

📁 本程序可从网上利用百度搜索引擎下载和输入关键词有关的网页
💻 JAVA
字号:

package spider;
import java.util.*;
import java.sql.*;
import java.lang.*;
import java.io.*;



public class SaveToDataBase//操作数据库类
{
	private String urls=null;
	private String searchWord=null;
	private int whichUrl=0;//用于从数据库表urls中取出这个ID号的Url
	private List urlList = new ArrayList();//保存从html字符串中提取的url
	private List insertSqlList = new ArrayList();//保存了要插入的sql语句
	
	
	public List getInsertSqlList() 
	{
		return insertSqlList;
	}

	public void setInsertSqlList(List insertSqlList)
	{
		this.insertSqlList = insertSqlList;
	}
	
	public void setUrls(String urls)//设置要提取数据库
	{		
		this.urls=urls;
	}
		
	public String getUrls()//提取要提取数据库中记录的ID号
	{		
		return urls;	
	}
	
	public void setSearchWord(String searchWord)//搜索关键子的url编码,
	{		
		this.searchWord=searchWord;//java.net.URLEncoder.encode(searchWord);
	}
		
	public String getSearchWord()//提取要提取数据库中记录的ID号
	{		
		return searchWord;	
	}
	
	public void setWhichUrl(int whichUrl)//设置要提取数据库中记录的ID号
	{		
		this.whichUrl=whichUrl;
	}
		
	public int getWhichUrl()//提取要提取数据库中记录的ID号
	{		
		return whichUrl;	
	}
		
	public void setUrlList(List urlList)
	{		
		this.urlList=urlList;
	}
		
	public List getUrlList()
	{		
		return urlList;	
	}
	
	public static Connection getConnection()//连接数据库
	{
		try
		{
			 Class.forName("com.microsoft.jdbc.sqlserver.SQLServerDriver").newInstance();         
			 String  url="jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=xy";       
			 String  user="sa";         
			 String  password="a";      
			 Connection  conn=  DriverManager.getConnection(url,user,password);
			 return conn;
		}
		catch(Exception e)
		{
			e.toString();
			return null;
		}
	}
	
	
	public void saveUrlToDataBase()//把urlList中的符合条件的oneUrl保存到数据库表urls中
	{
		 Connection  conn=getConnection();;
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {
		   try
		    {
			 Statement  stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
			 if( urlList!=null)
			 {System.out.println(urlList.size());
				 for(int n=0;n<urlList.size();n++)
				 {
					 String oneUrl=(urlList.get(n)).toString();
					 
					//是具体网站连接					
						// System.out.println(oneUrl+"   is split "+oneUrl.split("/").length);
						// if( oneUrl.indexOf("s?")==-1 && oneUrl.indexOf("c?word")==-1 &&oneUrl.indexOf("f?")==-1 &&oneUrl.indexOf("q?")==-1 &&oneUrl.indexOf("m?")==-1)//oneUrl.split("/").length<=4 &&小于站内三层则插入并且没有?号
					 if(oneUrl.indexOf("baidu.com")==-1)
						 {
						       //String titleOneUrl=null;
						       System.out.println("具体网站-----"+oneUrl);
						       oneUrl=oneUrl.substring(0, oneUrl.indexOf("/",10));	//只取到第三个/,http://www.cpanet.cn后面不取				 				
						       System.out.println("首页---"+n+oneUrl);
						       
						      if(oneUrl.indexOf("/bbs.")==-1 && oneUrl.indexOf("blog")==-1 && oneUrl.indexOf(".bokee.")==-1 && oneUrl.indexOf("spaces.live.com")==-1 && oneUrl.indexOf(".edu.")==-1 && oneUrl.indexOf(".gov")==-1 && oneUrl.indexOf(".tw")==-1 && oneUrl.indexOf("lib.")==-1 && oneUrl.indexOf(".org")==-1 && oneUrl.indexOf("news")==-1 && oneUrl.indexOf(".kongfz.")==-1 && oneUrl.indexOf(".taobao.")==-1 && oneUrl.indexOf(".sohu.")==-1 && oneUrl.indexOf(".sina.")==-1)//
						       {
								   String checkSql="select * from urls where  myurl like '%"+oneUrl+"%'";//取出这个文章的地址                   
								   ResultSet checkSqlRs=stmt.executeQuery(checkSql);                                                 					 
								   if(checkSqlRs.next())                     
								    {
									   //如果表单中有,则修改出现在那个领域的次数
									   System.out.println("数据库表urls中有此url");
									   int timeNumber=checkSqlRs.getInt(searchWord);
									   int ID=checkSqlRs.getInt("ID");							 
									   timeNumber++;
									   String upDateTimeNumber="update urls set "+searchWord+"="+timeNumber+" where ID="+ID;
									   stmt.executeUpdate(upDateTimeNumber);
									   //System.out.println("数据库表urls中有此url");
								    }                    
								   else//如果表单中没有此url,则插入
								   { 							 						 
									 if(oneUrl.indexOf(".jpg")==-1 && oneUrl.indexOf(".gif")==-1 && oneUrl.indexOf(".js")==-1 && oneUrl.indexOf(".exe")==-1 && oneUrl.indexOf(".swf")==-1 &&  oneUrl.indexOf(".css")==-1)//如果没有出现这些字符的着插入
									 {
										     System.out.println(searchWord);
											 String insertSql="insert into urls(myurl,"+searchWord+",pagenumber) values('"+ oneUrl+"','1','"+whichUrl+"')" ;             			 
											 stmt.execute(insertSql);
											// System.out.println("insert urls表 myurl列 "+oneUrl); 						
									 }
									 else
									 {   
										 if(oneUrl.indexOf(".jsp")!=-1 )//在出去的内容中检查是否有含有.jsp的url
										 {
											 String insertSql="insert into urls(myurl,"+searchWord+",pagenumber) values('"+ oneUrl+"','1','"+whichUrl+"')" ;      			 
											 stmt.execute(insertSql);
											// System.out.println("insert urls表 myurl列 "+oneUrl); 	
										 }
										 else
										 {
											 //System.out.println("不是网页,没有保存到数据库表urls中");
										 }
										
									 }						 			 		 
								  }
						    	   
						       }
						       else
						       {
						    	   System.out.println("除去bbs和blog");
						       }

						 }
						 else//大于站内三层则不插入
						 {
							// System.out.println("url大于站内三层则");
						 }
 
					 }//end for
				 }//end if							  
			 stmt.close();
			 conn.close();
	     }
	    catch(Exception e)
				{
					//System.out.println( "saveToDataBase()有异常是:"); 
					e.printStackTrace();
				}
				
   }//end else			
			 
			 			   
}
	
	
	public String outPutOneUrl()//从数据库表urls中取出一个ID号whichurl的Url
	{				
		String word=null;
		
		Connection  conn=getConnection();
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {
			if(whichUrl!=0)
			{
				try
				   {       
		             Statement  stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY); 
		             String checkSql="select * from "+urls+" where  ID="+whichUrl;//取出这个文章的地址                   
		             ResultSet checkSqlRs=stmt.executeQuery(checkSql);                                                 		 
		             if(checkSqlRs.next())                     
		                 {			 
			                word=checkSqlRs.getString("myurl");
			               // System.out.println("数据库中有ID为 "+whichUrl+"的url  "+word);			 
		                 }                    
		             else
		               {
		              	 //System.out.println("数据库中没有ID为"+whichUrl+"的url" );					 			 		 
		               }                                                        		           
		             stmt.close();       
		             conn.close();
		          }
				catch(Exception e)
                  {                     
                   // System.out.println( "outPutOneUrl()有异常"); 
                    e.printStackTrace();
                  }
			}
		  else
		  {
			//System.out.println("whichUrl is 0");
		  }
	 }		 
		 return word;		
	}


		
	public void operateInsertSqlList()//批量插入sql语句
	{
		Connection  conn=getConnection();
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {
			 try 
			 {
				Statement  stmt=null;
				if(insertSqlList!=null)
				{
					for(int sl=0;sl<insertSqlList.size();sl++)
					{
					    stmt=conn.createStatement();
						stmt.executeUpdate(insertSqlList.get(sl).toString());
						System.out.println("插入了一条sql");
					}
				}
				stmt.close();
				conn.close();								
			 } 
			 catch (SQLException e)
			 {			
				 System.out.println("插入sql语句出现异常");
				 e.printStackTrace();
				
			 }
		 }
	}
	
	public void operateInsertOneSql(String insertOneSql,String selectOneSql)//插入一条sql(参数)语句
	{
		Connection  conn=getConnection();
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {
			 try 
			 {
				Statement  stmt=null;
				if(insertOneSql!=null && selectOneSql!=null)
				{				
					    stmt=conn.createStatement();
					    ResultSet checkSqlRs=stmt.executeQuery(selectOneSql);                                                 		 
			             if(checkSqlRs.next())                     
			                 {			 
			            	     System.out.println("数据库中有此记录" );	
			                 }                    
			             else
			                {
								stmt.executeUpdate(insertOneSql);
								System.out.println("插入了一条sql");			              	  				 			 		 
			                }  					    				
				}
				stmt.close();
				conn.close();								
			 } 
			 catch (SQLException e)
			 {			
				 System.out.println("插入sql语句出现异常");
				 e.printStackTrace();
				
			 }
		 }
	}

	
	public Map selectOneForm(String SqlSelect)//从一个数据库表keywordinsearchinteface中取出领域关键字,放到集合List中
	{			
		Map keywordValueMap=new HashMap();//存放的是,一个Map中的键值对,一个List中是键
		Map valueMap=new HashMap();//一个Map中的键值对
		List keyWordList = new ArrayList();//一个List中是键
		Connection  conn=getConnection();
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {			
				try
				   {       
		             Statement  stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY); 
		                                
		             ResultSet SqlRs=stmt.executeQuery(SqlSelect);                                                 		 
		             while(SqlRs.next())                     
		                 {			//注意把表中的两列,关键字和值放到了一块,可扩展性不好,有时间要改 
		            	    keyWordList.add(SqlRs.getString("keyword")) ;//List中是键
		            	    valueMap.put(SqlRs.getString("keyword"), new Integer(SqlRs.getInt("value")));//一个Map中的键值	对	            	    	            	    

		                 }                    
                                                        		           
		             stmt.close();       
		             conn.close();
		          }
				catch(Exception e)
                  {                     
                    System.out.println( "keywordinsearchinteface表有异常"); 
                    e.printStackTrace();
                  }
			}
		 keywordValueMap.put("keyWordList", keyWordList);
		 keywordValueMap.put("valueMap", valueMap);
		 return keywordValueMap;		
	}
	
	
	
	public void updateUrlsTable(String updateSql)
	{
		Connection  conn=getConnection();
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {			
				try
				   {       
		             Statement  stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY); 		             
		             stmt.executeUpdate(updateSql) ;                                                		                                                                              		           
		             stmt.close();  
		             conn.close();
		             
		          }
				catch(Exception e)
                  {                     
                    System.out.println( "keywordinsearchinteface表有异常"); 
                    e.printStackTrace();
                  }

			}
	}
	
	public Map selectIDFromUrls(String SqlSelect)//从一个数据库表urls中取出ID,放到集合List中
	{			
		Map IDMap=new HashMap();//存放的是,一个Map中的键值对,一个List中是键
		List IDList = new ArrayList();//一个List中是键
		Connection  conn=getConnection();
		 if( conn==null)
		 {
			 System.out.println("数据库连接失败");
		 }
		 else
		 {			
				try
				   {       
		             Statement  stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY); 
		                                
		             ResultSet SqlRs=stmt.executeQuery(SqlSelect);                                                 		 
		             while(SqlRs.next())                     
		                 {			//注意把表中的两列,关键字和值放到了一块,可扩展性不好,有时间要改 
		            	    IDList.add(SqlRs.getString("ID")) ;//List中是键
		                 }                    
                                                        		           
		             stmt.close();       
		             conn.close();
		          }
				catch(Exception e)
                  {                     
                    System.out.println( "操作urls表有异常"); 
                    e.printStackTrace();
                  }
			}
		 IDMap.put("IDList", IDList);
		 return IDMap;		
	}
	
	
}//end Class




⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -