⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 爬虫表.sql

📁 本程序可从网上利用百度搜索引擎下载和输入关键词有关的网页
💻 SQL
📖 第 1 页 / 共 2 页
字号:

use xy


--drop table baidu
--drop table urls
--drop table keywordinsearchinterface
--drop table searchinterfacekeyword
--drop table keywordinpage



create table urls
(
   --存放的是具体的网页
   ID int identity(1,1) primary  key,
   myurl varchar(200),               --网站首页
   zhen varchar(20),                 --图书接口
   keneng varchar(20),               --有表单,但表单中没有书名,出版社等字样,但是,网页中有书店,书库等字样,可能是图书接口
   xun varchar(20),                  --网页中有文本框和搜索字样,网页中有书店,书库等字样,可能是图书接口
   西游记 varchar(20),               -- 搜索关键词
   围城 varchar(20),
   --民法 varchar(20),                  -- 搜索关键词
   --本草纲目 varchar(20),                 -- 搜索关键词
   --java面向对象编程 varchar(20),
   --书 varchar(200),               -- 搜索关键词
   --书店 varchar(20),                  -- 搜索关键词
   --图书 varchar(20),
   --文学 varchar(20),
   --计算机 varchar(20),                  -- 搜索关键词
   --法律 varchar(20),
   --医药 varchar(20),
   易中天 varchar(20),
   pagenumber varchar(20),            --在baidu页面中的页码数
   bbs varchar(20),
   edu varchar(20),
   gov varchar(20),
   org varchar(20),
   blog varchar(20),
   bookee varchar(20),
   news   varchar(20),
   lib  varchar(20)
)

select * from urls

create table baidu
(
--存放的是百度的下一页
   ID  int identity(1,1) primary  key,
   myurl varchar(200),--下一页地址
)

select * from baidu

create table keywordinsearchinterface
(
--图书接口含有的特征
 ID  int identity(1,1) primary  key,
 keyword varchar(20),
 value int
)
insert into keywordinsearchinterface values('书名',9)
insert into keywordinsearchinterface values('作者',5)--form中去掉空格
insert into keywordinsearchinterface values('译者',5)
insert into keywordinsearchinterface values('isbn',9)
insert into keywordinsearchinterface values('著者',5)
insert into keywordinsearchinterface values('出版社',5)
insert into keywordinsearchinterface values('编号',9)


--drop table keywordinsearchinterface

--update keywordinsearchinterface set keyword='kkkkkkkk' where id=5

select * from  keywordinsearchinterface


alter table keywordinsearchinterface add value int






create table searchinterfacekeyword
(
--存放的是搜索接口的特征词
 ID  int identity(1,1) primary  key,
 keyword varchar(20),
 value varchar(20)
)
insert into searchinterfacekeyword(1) values('搜')
insert into searchinterfacekeyword(1) values('查')
insert into searchinterfacekeyword(1) values('search')
insert into searchinterfacekeyword(1) values('keyword')
insert into searchinterfacekeyword(1) values('go')
insert into searchinterfacekeyword(1) values('检')
insert into searchinterfacekeyword(1) values('索')


update searchinterfacekeyword set value=10 

select * from  searchinterfacekeyword


alter table searchinterfacekeyword add value int





create table keywordinpage
(
 ID  int identity(1,1) primary  key,
 keyword varchar(20),
 value int
)
insert into keywordinpage values('图书',5)
insert into keywordinpage values('书店',5)
insert into keywordinpage values('书坊',5)
insert into keywordinpage values('书网',5)
insert into keywordinpage values('书籍',5)
insert into keywordinpage values('书城',5)
insert into keywordinpage values('热门新书',5)
insert into keywordinpage values('上架新书',5)
insert into keywordinpage values('新书上架',5)
insert into keywordinpage values('图书检索',5)
insert into keywordinpage values('新书推荐',5)
insert into keywordinpage values('图书分类',5)
insert into keywordinpage values('图书详细分类',5)
insert into keywordinpage values('文学',1)
insert into keywordinpage values('法律',1)
insert into keywordinpage values('计算机',1)
insert into keywordinpage values('医学',1)
insert into keywordinpage values('经济',1)
insert into keywordinpage values('管理',1)
insert into keywordinpage values('建筑',1)
insert into keywordinpage values('外语',1)
insert into keywordinpage values('教育',1)
insert into keywordinpage values('少儿',1)

--drop table keywordinpage

update keywordinpage set value=5 

select * from  keywordinpage


alter table keywordinpage add value int





create table serviceinpage
(
 ID  int identity(1,1) primary  key,
 keyword varchar(20),
 value int
)



insert into serviceinpage values('订单',5)
insert into serviceinpage values('付款',5)
insert into serviceinpage values('汇款',5)
insert into serviceinpage values('配送',5)
insert into serviceinpage values('购买',5)
insert into serviceinpage values('订购',5)
insert into serviceinpage values('购书',5)
insert into serviceinpage values('支付',5)
insert into serviceinpage values('求购',5)
insert into serviceinpage values('定价',5)
insert into serviceinpage values('购物指南',5)
insert into serviceinpage values('送货方式',5)
insert into serviceinpage values('订购商品',5)
insert into serviceinpage values('图书求购',5)
insert into serviceinpage values('会员价',5)
insert into serviceinpage values('折扣价',5)
insert into serviceinpage values('特价图书',5)
insert into serviceinpage values('购物车',5)
insert into serviceinpage values('售后服务',5)
insert into serviceinpage values('配送范围',5)
insert into serviceinpage values('购买流程',5)
insert into serviceinpage values('价格比较',5)



update serviceinpage set value=5

--drop table serviceinpage

select * from serviceinpage

alter table urls add 书店 int







update urls set 书网上店=0 where 红楼梦=1 --ID>=3525 and ID<=3770


select * from urls where (ID>=1000 and ID<=1890) and (keneng=1)




select * from urls where ID>=1833 and (zhen=1 or xun=1) order by ID


select * from qiannian where(zhen=1 or xun=1) and  (红色季风>=1 and 易中天品三国>=1) 



select * from qiannian where  围城>=1 and (ID>=743 and ID<=3101) and (zhen=1 or xun=1)


update qiannian set zhen=0,xun=0 where myurl like '%blog%'
update qiannian set zhen=0,xun=0 where myurl like '%bbs%'
update qiannian set zhen=0,xun=0 where myurl like '%.edu.%'
update qiannian set zhen=0,xun=0 where myurl like '%.gov.%'
update qiannian set zhen=0,xun=0 where myurl like '%.org%'
update qiannian set zhen=0,xun=0 where myurl like '%bokee%'
update qiannian set zhen=0,xun=0 where myurl like '%news%'
update qiannian set zhen=0,xun=0 where myurl like '%lib.%'
update qiannian set zhen=0,xun=0 where myurl like '%.tw%'


update urls set zhen=0 where  ID=1194  and zhen=1 


select * from urls where zhen=1 and myurl like '%copies.sinoshu%'



update urls set keneng=1 where  ID=1306







update urls set zhen= where  ID=1252 



select count(*) from urls where (pagenumber>=35 and pagenumber<=39) and zhen=1 

select * from urls where myurl like '%.edu.%' and (zhen=1 or xun=1)
 
select count(*) from urls where  易中天>=1 and  (ID>=745 and ID<=1256)

select count(*) from urls where keneng=1 and ID>694

select count(*) from urls where zhen=1 and ID>1380

select * from urls where zhen=1 and ID>1380


select * from urls where myurl like '%bbs%' --and (zhen=1 or xun=1)






update urls set zhen=0 where  ID>=1890



select * from urls where (bbs=1 or edu=1 or gov=1 or org=1 or blog=1 or bokee=1 or news=1 or lib=1 or tw=1) and (ID<=1890 and ID>1000) and zhen=1

select * from urls --where (ID>=1000 and ID<=1833) and (zhen=1 or xun=1)






select * from urls where myurl like '%bokee%' 

select * from urls where myurl like '%new%'


 
update urls set zhen=1 where ID=2228

select * from baidu

select * from searchinterfacekeyword

select * from keywordinsearchinterface

select * from keywordinpage

select * from serviceinpage

update keywordinsearchinterface set keyword='书  名' where id=8

update keywordinpage set keyword='新书' where id=1

delete from baidu where id>34 and id<40

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -