📄 savetodatabase.java
字号:
package spider;
import java.util.*;
import java.sql.*;
import java.lang.*;
import java.io.*;
public class SaveToDataBase//操作数据库类
{
private String urls=null;
private String searchWord=null;
private int whichUrl=0;//用于从数据库表urls中取出这个ID号的Url
private List urlList = new ArrayList();//保存从html字符串中提取的url
private List insertSqlList = new ArrayList();//保存了要插入的sql语句
public List getInsertSqlList()
{
return insertSqlList;
}
public void setInsertSqlList(List insertSqlList)
{
this.insertSqlList = insertSqlList;
}
public void setUrls(String urls)//设置要提取数据库
{
this.urls=urls;
}
public String getUrls()//提取要提取数据库中记录的ID号
{
return urls;
}
public void setSearchWord(String searchWord)//搜索关键子的url编码,
{
this.searchWord=searchWord;//java.net.URLEncoder.encode(searchWord);
}
public String getSearchWord()//提取要提取数据库中记录的ID号
{
return searchWord;
}
public void setWhichUrl(int whichUrl)//设置要提取数据库中记录的ID号
{
this.whichUrl=whichUrl;
}
public int getWhichUrl()//提取要提取数据库中记录的ID号
{
return whichUrl;
}
public void setUrlList(List urlList)
{
this.urlList=urlList;
}
public List getUrlList()
{
return urlList;
}
public static Connection getConnection()//连接数据库
{
try
{
Class.forName("com.microsoft.jdbc.sqlserver.SQLServerDriver").newInstance();
String url="jdbc:microsoft:sqlserver://localhost:1433;DatabaseName=xy";
String user="sa";
String password="a";
Connection conn= DriverManager.getConnection(url,user,password);
return conn;
}
catch(Exception e)
{
e.toString();
return null;
}
}
public void saveUrlToDataBase()//把urlList中的符合条件的oneUrl保存到数据库表urls中
{
Connection conn=getConnection();;
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
try
{
Statement stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
if( urlList!=null)
{System.out.println(urlList.size());
for(int n=0;n<urlList.size();n++)
{
String oneUrl=(urlList.get(n)).toString();
//是具体网站连接
// System.out.println(oneUrl+" is split "+oneUrl.split("/").length);
// if( oneUrl.indexOf("s?")==-1 && oneUrl.indexOf("c?word")==-1 &&oneUrl.indexOf("f?")==-1 &&oneUrl.indexOf("q?")==-1 &&oneUrl.indexOf("m?")==-1)//oneUrl.split("/").length<=4 &&小于站内三层则插入并且没有?号
if(oneUrl.indexOf("baidu.com")==-1)
{
//String titleOneUrl=null;
System.out.println("具体网站-----"+oneUrl);
oneUrl=oneUrl.substring(0, oneUrl.indexOf("/",10)); //只取到第三个/,http://www.cpanet.cn后面不取
System.out.println("首页---"+n+oneUrl);
if(oneUrl.indexOf("/bbs.")==-1 && oneUrl.indexOf("blog")==-1 && oneUrl.indexOf(".bokee.")==-1 && oneUrl.indexOf("spaces.live.com")==-1 && oneUrl.indexOf(".edu.")==-1 && oneUrl.indexOf(".gov")==-1 && oneUrl.indexOf(".tw")==-1 && oneUrl.indexOf("lib.")==-1 && oneUrl.indexOf(".org")==-1 && oneUrl.indexOf("news")==-1 && oneUrl.indexOf(".kongfz.")==-1 && oneUrl.indexOf(".taobao.")==-1 && oneUrl.indexOf(".sohu.")==-1 && oneUrl.indexOf(".sina.")==-1)//
{
String checkSql="select * from urls where myurl like '%"+oneUrl+"%'";//取出这个文章的地址
ResultSet checkSqlRs=stmt.executeQuery(checkSql);
if(checkSqlRs.next())
{
//如果表单中有,则修改出现在那个领域的次数
System.out.println("数据库表urls中有此url");
int timeNumber=checkSqlRs.getInt(searchWord);
int ID=checkSqlRs.getInt("ID");
timeNumber++;
String upDateTimeNumber="update urls set "+searchWord+"="+timeNumber+" where ID="+ID;
stmt.executeUpdate(upDateTimeNumber);
//System.out.println("数据库表urls中有此url");
}
else//如果表单中没有此url,则插入
{
if(oneUrl.indexOf(".jpg")==-1 && oneUrl.indexOf(".gif")==-1 && oneUrl.indexOf(".js")==-1 && oneUrl.indexOf(".exe")==-1 && oneUrl.indexOf(".swf")==-1 && oneUrl.indexOf(".css")==-1)//如果没有出现这些字符的着插入
{
System.out.println(searchWord);
String insertSql="insert into urls(myurl,"+searchWord+",pagenumber) values('"+ oneUrl+"','1','"+whichUrl+"')" ;
stmt.execute(insertSql);
// System.out.println("insert urls表 myurl列 "+oneUrl);
}
else
{
if(oneUrl.indexOf(".jsp")!=-1 )//在出去的内容中检查是否有含有.jsp的url
{
String insertSql="insert into urls(myurl,"+searchWord+",pagenumber) values('"+ oneUrl+"','1','"+whichUrl+"')" ;
stmt.execute(insertSql);
// System.out.println("insert urls表 myurl列 "+oneUrl);
}
else
{
//System.out.println("不是网页,没有保存到数据库表urls中");
}
}
}
}
else
{
System.out.println("除去bbs和blog");
}
}
else//大于站内三层则不插入
{
// System.out.println("url大于站内三层则");
}
}//end for
}//end if
stmt.close();
conn.close();
}
catch(Exception e)
{
//System.out.println( "saveToDataBase()有异常是:");
e.printStackTrace();
}
}//end else
}
public String outPutOneUrl()//从数据库表urls中取出一个ID号whichurl的Url
{
String word=null;
Connection conn=getConnection();
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
if(whichUrl!=0)
{
try
{
Statement stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
String checkSql="select * from "+urls+" where ID="+whichUrl;//取出这个文章的地址
ResultSet checkSqlRs=stmt.executeQuery(checkSql);
if(checkSqlRs.next())
{
word=checkSqlRs.getString("myurl");
// System.out.println("数据库中有ID为 "+whichUrl+"的url "+word);
}
else
{
//System.out.println("数据库中没有ID为"+whichUrl+"的url" );
}
stmt.close();
conn.close();
}
catch(Exception e)
{
// System.out.println( "outPutOneUrl()有异常");
e.printStackTrace();
}
}
else
{
//System.out.println("whichUrl is 0");
}
}
return word;
}
public void operateInsertSqlList()//批量插入sql语句
{
Connection conn=getConnection();
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
try
{
Statement stmt=null;
if(insertSqlList!=null)
{
for(int sl=0;sl<insertSqlList.size();sl++)
{
stmt=conn.createStatement();
stmt.executeUpdate(insertSqlList.get(sl).toString());
System.out.println("插入了一条sql");
}
}
stmt.close();
conn.close();
}
catch (SQLException e)
{
System.out.println("插入sql语句出现异常");
e.printStackTrace();
}
}
}
public void operateInsertOneSql(String insertOneSql,String selectOneSql)//插入一条sql(参数)语句
{
Connection conn=getConnection();
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
try
{
Statement stmt=null;
if(insertOneSql!=null && selectOneSql!=null)
{
stmt=conn.createStatement();
ResultSet checkSqlRs=stmt.executeQuery(selectOneSql);
if(checkSqlRs.next())
{
System.out.println("数据库中有此记录" );
}
else
{
stmt.executeUpdate(insertOneSql);
System.out.println("插入了一条sql");
}
}
stmt.close();
conn.close();
}
catch (SQLException e)
{
System.out.println("插入sql语句出现异常");
e.printStackTrace();
}
}
}
public Map selectOneForm(String SqlSelect)//从一个数据库表keywordinsearchinteface中取出领域关键字,放到集合List中
{
Map keywordValueMap=new HashMap();//存放的是,一个Map中的键值对,一个List中是键
Map valueMap=new HashMap();//一个Map中的键值对
List keyWordList = new ArrayList();//一个List中是键
Connection conn=getConnection();
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
try
{
Statement stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
ResultSet SqlRs=stmt.executeQuery(SqlSelect);
while(SqlRs.next())
{ //注意把表中的两列,关键字和值放到了一块,可扩展性不好,有时间要改
keyWordList.add(SqlRs.getString("keyword")) ;//List中是键
valueMap.put(SqlRs.getString("keyword"), new Integer(SqlRs.getInt("value")));//一个Map中的键值 对
}
stmt.close();
conn.close();
}
catch(Exception e)
{
System.out.println( "keywordinsearchinteface表有异常");
e.printStackTrace();
}
}
keywordValueMap.put("keyWordList", keyWordList);
keywordValueMap.put("valueMap", valueMap);
return keywordValueMap;
}
public void updateUrlsTable(String updateSql)
{
Connection conn=getConnection();
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
try
{
Statement stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
stmt.executeUpdate(updateSql) ;
stmt.close();
conn.close();
}
catch(Exception e)
{
System.out.println( "keywordinsearchinteface表有异常");
e.printStackTrace();
}
}
}
public Map selectIDFromUrls(String SqlSelect)//从一个数据库表urls中取出ID,放到集合List中
{
Map IDMap=new HashMap();//存放的是,一个Map中的键值对,一个List中是键
List IDList = new ArrayList();//一个List中是键
Connection conn=getConnection();
if( conn==null)
{
System.out.println("数据库连接失败");
}
else
{
try
{
Statement stmt=conn.createStatement(ResultSet.TYPE_SCROLL_SENSITIVE,ResultSet.CONCUR_READ_ONLY);
ResultSet SqlRs=stmt.executeQuery(SqlSelect);
while(SqlRs.next())
{ //注意把表中的两列,关键字和值放到了一块,可扩展性不好,有时间要改
IDList.add(SqlRs.getString("ID")) ;//List中是键
}
stmt.close();
conn.close();
}
catch(Exception e)
{
System.out.println( "操作urls表有异常");
e.printStackTrace();
}
}
IDMap.put("IDList", IDList);
return IDMap;
}
}//end Class
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -