📄 getsoupu028shop.java
字号:
import java.io.*;
import java.net.*;
import java.sql.*;
import java.util.*;
/**
* http://www.soupu028.com/search.asp?PageNo=1&xxlb=商铺门面&qy=&jzmj=&zjjg=&xxbt=&bh=####
* @author Administrator
*
*/
public class GetSoupu028Shop {
Connection con=null;
Statement st = null;
CallableStatement cs=null;
ResultSet rs=null;
/**
* @param args 程序入口
*/
public static void main(String[] args) {
}
/**
*通过数据库的URL来获取详细信息,并存入数据库
*
*/
public void saveSoupu028Shop(){
try {
Vector vec = querySoupu028Url();
for (int i = 0; i < vec.size(); i++) {
String url = vec.get(i).toString();
int j = Integer.parseInt(url.substring(36, 41));
this.saveSoupu028Info(url,j);
System.out.println(url);
System.out.println("--------------------------------------------");
updateUrlGets(url,"{call update_soupu028Gets(?)}","soupu028Url");
}
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* 通过传入的URL获取成都商铺信息网的商铺详细信息,并存入数据库
*/
public void saveSoupu028Info(String url,int j){
String html = getHTML(url);
//标题,转让方式,城市,区域,具体位置,联系人名字,电话,邮件,详细信息,面积中文,类型
String title="",transWay="",city="成都",region="",place="",name="",number="",email="",details="",areaText="",type="";
int area = 0;
String opStr = html.substring(html.indexOf("<TBODY>"),html.indexOf("</TBODY>"));
//取标题
int titleStart = opStr.indexOf("#0066FF\" >");
int titleEnd = opStr.indexOf("</font>");
title = opStr.substring(titleStart+10, titleEnd);
//取转让方式
String twStr = opStr.substring(opStr.indexOf("交易性质"), opStr.length());
int twStart = twStr.indexOf("#0066FF\" >");
int twEnd = twStr.indexOf("</font>");
transWay = twStr.substring(twStart+10, twEnd);
//取面积
String areaStr = opStr.substring(opStr.indexOf("建筑面积"), opStr.length());
int areaStart = areaStr.indexOf("#0066FF\" >");
int areaEnd = areaStr.indexOf("</font>");
areaText = areaStr.substring(areaStart+10, areaEnd);
for (int i = 0; i < areaText.length(); i++) {
if(areaText.charAt(i)<48 ||areaText.charAt(i)>57){
areaText = areaText.replaceAll(areaText.substring(i,areaText.length()), "");
}
}
if(areaText!=""&&areaText.length()>0){
area = Integer.parseInt(areaText);
}
//取区域
String regionStr = opStr.substring(opStr.indexOf("所属区县"), opStr.length());
int regionStart = regionStr.indexOf("#0066FF\" >");
int regionEnd = regionStr.indexOf("</font>");
region = regionStr.substring(regionStart+10, regionEnd);
//取所在位置
String placeStr = opStr.substring(opStr.indexOf("所在位置"), opStr.length());
int placeStart = placeStr.indexOf("#0066FF\" >");
int placeEnd = placeStr.indexOf("</font>");
place = placeStr.substring(placeStart+10, placeEnd);
//取联系人姓名
String nameStr = opStr.substring(opStr.indexOf("#993300>")+20, opStr.length());
int nameStart = nameStr.indexOf("#993300>");
int nameEnd = nameStr.indexOf("</font>");
name = nameStr.substring(nameStart+8, nameEnd);
//取联系人电话
String numberStr = opStr.substring(opStr.indexOf("width=\"181\">"), opStr.length());
int numberStart = numberStr.indexOf("#993300 >");
int numberEnd = numberStr.indexOf("</font>");
int number2Start = numberStr.lastIndexOf("#993300 >");
int number2End = numberStr.substring(number2Start, numberStr.length()).indexOf("</font>");
String number2Str = numberStr.substring(number2Start, numberStr.length());
number = numberStr.substring(numberStart+9, numberEnd)+" "+number2Str.substring(9, number2End);
//取联系人EMAIL
String emailStr = opStr.substring(opStr.indexOf("#993300>邮")+50, opStr.length());
int emailStart = emailStr.indexOf("#993300>");
int emailEnd = emailStr.indexOf("</font>");
email = emailStr.substring(emailStart+8, emailEnd);
//取详细内容
String detailsStr = opStr.substring(opStr.indexOf("color=\"#0066FF\" > "), opStr.length());
int detailsStart = detailsStr.lastIndexOf("color=\"#0066FF\" > ");
int detailsEnd = detailsStr.indexOf("</font>");
int details2End = detailsStr.lastIndexOf("</font>");
details = detailsStr.substring(24, detailsEnd);
if(details.indexOf("<br>")>-1||details.indexOf(" ")>-1){
details = details.replaceAll("<br>", "");
details = details.replaceAll(" ", "");
}
String details2 = detailsStr.substring(detailsStart+23, details2End);
if(details2.indexOf("<br>")>-1||details2.indexOf(" ")>-1){
details2 = details2.replaceAll("<br>", "");
details2 = details2.replaceAll(" ", "");
}
String sql = "{call add_soupu028(?,?,?,?,?,?,?,?,?,?,?,?)}";
try {
Vector nameVec = querySoupu028ShopName();
if(!nameVec.contains(title)){
if(title.length()>0&&number.length()>0&&(details.length()+details2.length())>0){
System.out.println("-------------------开始---------------------");
System.out.println("信息主题:"+title.trim());
System.out.println("--------------------------------------------");
System.out.println("转让方式:"+transWay.trim());
System.out.println("--------------------------------------------");
System.out.println("建筑面积:"+area);
System.out.println("--------------------------------------------");
System.out.println("所属区县:"+region.trim());
System.out.println("--------------------------------------------");
System.out.println("所在位置:"+place.trim());
System.out.println("--------------------------------------------");
System.out.println("联系人:"+name.trim());
System.out.println("--------------------------------------------");
System.out.println("电话:"+number.trim());
System.out.println("--------------------------------------------");
System.out.println("联系人邮件:"+email.trim());
System.out.println("--------------------------------------------");
System.out.println("详细内容:"+details.trim()+details2.trim());
System.out.println("--------------------结束--------------------");
savePumian(city, region, getType(title,title), title, number, email, details+details2, j, area, place,transWay,j,sql);
}
}
} catch (SQLException e) {
e.printStackTrace();
}
}
/**
* 判断信息的类型
*/
public static String getType(String name,String range){
String type = "";
if(name.indexOf("酒吧")>-1||name.indexOf("水吧")>-1)
{type="酒吧水吧";}
else if(name.indexOf("餐")>-1)
{type="中西餐馆";}
else if(name.indexOf("衣服")>-1||name.indexOf("饰品")>-1||name.indexOf("服装")>-1)
{type="服装饰品";}
else if(name.indexOf("面馆")>-1||name.indexOf("小吃")>-1||name.indexOf("快餐")>-1)
{type="快餐排档";}
else if(name.indexOf("茶")>-1||name.indexOf("茶楼")>-1||name.indexOf("茶坊")>-1)
{type="茶楼茶坊";}
else if(name.indexOf("干洗")>-1||name.indexOf("水洗")>-1)
{type="干洗水洗";}
else if(name.indexOf("火锅")>-1||name.indexOf("汤锅")>-1)
{type="火锅汤锅";}
else if((name.indexOf("美容")>-1||name.indexOf("美发")>-1))
{type="美容美发";}
else if(name.indexOf("影")>-1||name.indexOf("书")>-1)
{type="音像书刊";}
else if(name.indexOf("网吧")>-1||name.indexOf("游戏")>-1)
{type="网吧游戏";}
else if(name.indexOf("桑拿")>-1||name.indexOf("农家乐")>-1||name.indexOf("浴足")>-1||name.indexOf("瑜伽")>-1||name.indexOf("娱乐")>-1)
{type="休闲娱乐";}
else if(name.indexOf("写字楼")>-1||name.indexOf("办公室")>-1)
{type="写字间";}
else if(name.indexOf("地产代理")>-1||name.indexOf("中介")>-1)
{type="商业合作";}
else if(name.indexOf("厂房")>-1||name.indexOf("仓库")>-1||name.indexOf("土地")>-1)
{type="厂房库房";}
else
{
if(range.indexOf("酒吧")>-1||range.indexOf("水吧")>-1)
{type="酒吧水吧";}
else if(range.indexOf("餐")>-1)
{type="中西餐馆";}
else if(range.indexOf("衣服")>-1||range.indexOf("饰品")>-1||range.indexOf("服装")>-1)
{type="服装饰品";}
else if(range.indexOf("面馆")>-1||range.indexOf("小吃")>-1||range.indexOf("快餐")>-1)
{type="快餐排档";}
else if(range.indexOf("茶")>-1||range.indexOf("茶楼")>-1||range.indexOf("茶坊")>-1)
{type="茶楼茶坊";}
else if(range.indexOf("干洗")>-1||range.indexOf("水洗")>-1)
{type="干洗水洗";}
else if(range.indexOf("火锅")>-1||range.indexOf("汤锅")>-1)
{type="火锅汤锅";}
else if((range.indexOf("美容")>-1||range.indexOf("美发")>-1))
{type="美容美发";}
else if(range.indexOf("影")>-1||range.indexOf("书")>-1)
{type="音像书刊";}
else if(range.indexOf("网吧")>-1||range.indexOf("游戏")>-1)
{type="网吧游戏";}
else if(range.indexOf("桑拿")>-1||range.indexOf("农家乐")>-1||range.indexOf("浴足")>-1||range.indexOf("瑜伽")>-1||range.indexOf("娱乐")>-1)
{type="休闲娱乐";}
else if(range.indexOf("写字楼")>-1||range.indexOf("办公室")>-1)
{type="写字间";}
else if(range.indexOf("地产代理")>-1||range.indexOf("中介")>-1)
{type="商业合作";}
else if(range.indexOf("厂房")>-1||range.indexOf("仓库")>-1||range.indexOf("土地")>-1)
{type="厂房库房";}
else
{type="商铺租售";}
}
return type;
}
/**
* 获取整个页面在HTML代码
*/
public static String getHTML(String url){
String line;
String outString = "";
String returnStr = "";
try {
URL Url = new URL(null, url,new sun.net.www.protocol.http.Handler());
HttpURLConnection connection = (HttpURLConnection)Url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
while ( (line = in.readLine()) != null){
outString += line.trim();
}
returnStr = outString;
outString = null;
in.close();
} catch (Exception e) {
try{
URL Url = new URL(null, url,new sun.net.www.protocol.http.Handler());
HttpURLConnection connection = (HttpURLConnection)Url.openConnection();
BufferedReader in = new BufferedReader(new InputStreamReader(connection.getInputStream()));
while ( (line = in.readLine()) != null){
outString += line;
}
returnStr = outString;
outString = null;
in.close();
}catch (Exception ex) {
ex.printStackTrace();
}
}
return returnStr;
}
/**
* 查询soupu028URL
* @return
* @throws SQLException
*/
public Vector querySoupu028Url() {
Vector urlVec = new Vector();
try
{
con = getConnection();
String sql = "select id,url from soupu028Url where gets!=1 order by id";
st = con.createStatement();
rs = st.executeQuery(sql);
while(rs.next()){
urlVec.add(rs.getString(2));
}
}
catch (Exception e)
{
System.out.println(e.getMessage());
}
return urlVec;
}
/**
* 查询soupu028
* @return
* @throws SQLException
*/
public Vector querySoupu028ShopName() {
Vector urlVec = new Vector();
try
{
con = getConnection();
String sql = "select name from soupu028";
st = con.createStatement();
rs = st.executeQuery(sql);
while(rs.next()){
urlVec.add(rs.getString(2));
}
}
catch (Exception e)
{
System.out.println(e.getMessage());
}
return urlVec;
}
/**
* 更新GETS
* @param url
* @throws SQLException
*/
public void updateUrlGets(String url,String update,String table) throws SQLException{
con = getConnection();
String idUrl = "select id from "+table+" where url='"+url+"'";
st = con.createStatement();
rs = st.executeQuery(idUrl);
cs = con.prepareCall(update);
if(rs.next()){
cs.setInt(1, rs.getInt(1));
System.out.println("ID======"+rs.getInt(1));
}
cs.executeUpdate();
}
/**
* 保存成都商铺面信息
* @param city
* @param region
* @param type
* @param name
* @param number
* @param email
* @param details
* @param t4g
* @param area
* @param transWay
* @param place
* @param i
* @throws SQLException
*/
public void savePumian(String city,String region,String type,String name,String number,String email,
String details,int t4g,int area,String transWay,String place,int i,String sql) throws SQLException{
con = getConnection();
cs = con.prepareCall(sql);
cs.setString(1, city);
cs.setString(2, region);//region
cs.setString(3, type);//自己判断
cs.setString(4, name);
cs.setString(5, number);//number
cs.setString(6, email);
cs.setString(7, details);//details
cs.setInt(8,t4g);//url表中的,0表示没扫描过的,1表示扫描过了
cs.setInt(9,area);//area
cs.setString(10,transWay);//transWay
cs.setString(11,place);
cs.setInt(12, i);
cs.executeUpdate();
}
/**
* 获取连接
* @return
*/
public Connection getConnection(){
String sDBDriver = "com.microsoft.jdbc.sqlserver.SQLServerDriver";
String DBCon = "jdbc:microsoft:sqlserver://192.168.1.18:1433;DatabaseName=balan";
try {
Class.forName(sDBDriver);
con = DriverManager.getConnection(DBCon,"sa","jpw2002");
}catch (Exception e) {
System.err.println("Unable to load driver:" + e.getMessage());
}
return con;
}
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -