⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 traindatasql语句.txt

📁 基于决策树和贝叶斯的预测分析器
💻 TXT
字号:
        /* 1、 ageType分6类:<=20为year0_20,21-30为year21_30,31-40为year31_40,41-50为year41_50,51-60为year51_60,>=61为yearover60
        
           2、  原有education_numType: 1-16  ,分成6级:   edu1_3,edu4_6,edu7_9,edu10_12,edu13_14,edu15_16
        
           3、 occupationType共14类:Tech_support, Craft_repair, Other_service(?), Sales, Exec_managerial, Prof_specialty,
                   Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving, 
                   Priv_house_serv, Protective_serv, Armed_Forces
  
           4 、原有native-countryType:United-States, Cambodia, England, Puerto-Rico, Canada, Germany, 
          Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, 
          Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, 
          Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, 
          Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands}
               native_country分成5类developNO1,developNO2,developNO3,developNO4,developNO5后:
         developNO1:(<0.1)       Outlying-US(Guam-USVI-etc),Vietnam,Mexico,Dominican-Republic,Laos,Haiti,Hungary,
				Guatemala,Nicaragua,Scotland,El-Salvador,Trinadad&Tobago,Holand-Netherlands	
											
         developNO2:(>=0.1 <0.2)  Puerto-Rico,South,China,Cuba,Poland,Jamaica,Portugal,Ireland,Ecuador,Peru,?

         developNO3:(>=0.2 <0.3)  Honduras,France,Columbia,United-States,England,Germany,Greece,Philippines,Thailand,                                  Yugoslavia
         developNO4:(>=0.3 <0.4)  India.Japan.
         developNO5:(>=0.4 )      Cambodia,Canada,Iran,Italy,Taiwan,Hong      
        
平均:0.2375  
计算方法:
SELECT COUNT(*) 
FROM traindata1
WHERE native_country = ' ?'

SELECT COUNT(*)
FROM traindata1
WHERE native_country = ' ?' AND makeover = ' >50K'
      
具体数值:
Outlying-US(Guam-USVI-etc):0 Vietnam:0.08 Mexico:0.064 Dominican-Republic:0.083 Laos:0 Haiti:0.067 Hungary:0
Guatemala:0.048 Nicaragua:0 Scotland:0 El-Salvador:0.097 Trinadad&Tobago:0 Holand-Netherlands:0

Puerto-Rico:0.1395 South:0.1  China:0.1613 Cuba:0.1516  Poland:0.1739 Jamaica:0.1875 Portugal:0.1667
Ireland:0.125  Ecuador:0.125  Peru:0.1111  ?:0.1111


Honduras:0.2 France:0.2 Columbia:0.2 United-States:0.2418 England:0.2812 Germany:0.2653  Greece:0.2222 Philippines:0.2632 Thailand:0.25 Yugoslavia:0.25

India:0.3 Japan:0.3 
Cambodia:0.4285 Canada:0.404 Iran:0.4783  Italy:0.4545 Taiwan:0.4211 Hong:0.5
         5、sexType分为Male、Female两类
*/




      

CREATE DATABASE adult_data
ON
( 
	NAME=adult_data,
    FILENAME='adult_data.mdf')


CREATE TABLE traindata1
(

	age      INT,
	workclass VARCHAR(30),
	fnlwgt   INT,
	education VARCHAR(20),
	education_num INT,
	marital_status VARCHAR(30),
	occupation  VARCHAR(30),
	relationship VARCHAR(30),
	race VARCHAR(20),
	sex VARCHAR(10),
	capital_gain INT,
	capital_loss INT,
	hours_per_week INT,
	native_country  VARCHAR(30),
	makeover VARCHAR (10)
)


BULK INSERT traindata1
FROM 'adult.data.txt'
WITH
(
	FIELDTERMINATOR = ',',
	ROWTERMINATOR = '\n'
)


CREATE TABLE traindata2
(

	age      INT,
	workclass VARCHAR(30),
	fnlwgt   INT,
	education VARCHAR(20),
	education_num INT,
	marital_status VARCHAR(30),
	occupation  VARCHAR(30),
	relationship VARCHAR(30),
	race VARCHAR(20),
	sex VARCHAR(10),
	capital_gain INT,
	capital_loss INT,
	hours_per_week INT,
	native_country  VARCHAR(30),
	makeover50k VARCHAR (10)
)


BULK INSERT traindata2
FROM 'adult.data.txt'
WITH
(
	FIELDTERMINATOR = ',',
	ROWTERMINATOR = '\n'
)

ALTER TABLE traindata2 DROP COLUMN workclass
ALTER TABLE traindata2 DROP COLUMN fnlwgt
ALTER TABLE traindata2 DROP COLUMN marital_status
ALTER TABLE traindata2 DROP COLUMN relationship
ALTER TABLE traindata2 DROP COLUMN race
ALTER TABLE traindata2 DROP COLUMN capital_gain
ALTER TABLE traindata2 DROP COLUMN capital_loss
ALTER TABLE traindata2 DROP COLUMN hours_per_week

//ageType分6类:<=20为year0_20,21-30为year21_30,31-40为year31_40,41-50为year41_50,51-60为year51_60,>=61为yearover60

UPDATE traindata2
SET age=0
WHERE age<=20

UPDATE traindata2
SET age=1
WHERE age>20 AND age<=30

UPDATE traindata2
SET age=2
WHERE age>30 AND age<=40

UPDATE traindata2
SET age=3
WHERE age>40 AND age<=50

UPDATE traindata2
SET age=4
WHERE age>50 AND age<=60

UPDATE traindata2
SET age=5
WHERE age>60 

ALTER TABLE traindata2 ALTER COLUMN age VARCHAR(15)

UPDATE traindata2
SET age = 'year0_20'
WHERE age = '0'

UPDATE traindata2
SET age= 'year21_30'
WHERE age = '1'

UPDATE traindata2
SET age = 'year31_40'
WHERE age = '2'

UPDATE traindata2
SET age = 'year31_40'
WHERE age = '3'

UPDATE traindata2
SET age = 'year41_50'
WHERE age = '4'

UPDATE traindata2
SET age = 'yearover60'
WHERE age = '5'

//原有education_numType: 1-16  ,分成6级:   edu1_3,edu4_6,edu7_9,edu10_12,edu13_14,edu15_16

UPDATE traindata2
SET education = 'edu1_3'
WHERE education_num >= 0 AND  education_num <= 3

UPDATE traindata2
SET education = 'edu4_6'
WHERE education_num >= 4 AND  education_num <= 6

UPDATE traindata2
SET education = 'edu7_9'
WHERE education_num >= 7 AND  education_num <= 9

UPDATE traindata2
SET education = 'edu10_12'
WHERE education_num >= 10 AND  education_num <= 12

UPDATE traindata2
SET education = 'edu13_14'
WHERE education_num >= 13 AND  education_num <= 14

UPDATE traindata2
SET education = 'edu15_16'
WHERE education_num >= 15 AND  education_num <= 16

ALTER TABLE traindata2 DROP COLUMN education_num


//occupationType共14类:Tech-support, Craft-repair, Other-service, Sales, Exec-managerial, Prof-specialty,
//                   Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving, 
//                 Handlers-cleaners, Machine-op-inspct, Adm-clerical, Farming-fishing, Transport-moving
//                   Priv-house-serv, Protective-serv, Armed-Forces
//occupationType共14类(将原"-"改为"_"便于编程处理,并去掉前面的空格):Tech_support, Craft_repair, Other_service, Sales, //Exec_managerial, Prof_specialty, Handlers_cleaners, Machine_op_inspct, Adm_clerical, Farming_fishing, Transport_moving, 
//Priv_house_serv, Protective_serv, Armed_Forces

UPDATE traindata2
SET occupation = 'Tech_support'
WHERE occupation =' Tech-support' 

UPDATE traindata2
SET occupation = 'Craft_repair'
WHERE occupation =' Craft-repair' 

UPDATE traindata2
SET occupation = 'Other_service'
WHERE occupation =' Other-service' 

UPDATE traindata2
SET occupation = 'Exec_managerial'
WHERE occupation =' Exec-managerial' 

UPDATE traindata2
SET occupation = 'Prof_specialty'
WHERE occupation =' Prof-specialty' 

UPDATE traindata2
SET occupation = 'Handlers_cleaners'
WHERE occupation =' Handlers-cleaners' 

UPDATE traindata2
SET occupation = 'Machine_op_inspct'
WHERE occupation =' Machine-op-inspct' 

UPDATE traindata2
SET occupation = 'Adm_clerical'
WHERE occupation =' Adm-clerical' 

UPDATE traindata2
SET occupation = 'Farming_fishing'
WHERE occupation =' Farming-fishing' 
UPDATE traindata2
SET occupation = 'Transport_moving'
WHERE occupation =' Transport-moving' 

UPDATE traindata2
SET occupation = 'Priv_house_serv'
WHERE occupation =' Priv-house-serv' 

UPDATE traindata2
SET occupation = 'Protective_serv'
WHERE occupation =' Protective-serv' 

UPDATE traindata2
SET occupation = 'Armed_Forces'
WHERE occupation =' Armed-Forces' 

UPDATE traindata2
SET occupation = 'Other_service'
WHERE occupation =' ?'

/*原有native-countryType:United-States, Cambodia, England, Puerto-Rico, Canada, Germany, 
          Outlying-US(Guam-USVI-etc), India, Japan, Greece, South, China, Cuba, Iran, Honduras, 
          Philippines, Italy, Poland, Jamaica, Vietnam, Mexico, Portugal, Ireland, France, 
          Dominican-Republic, Laos, Ecuador, Taiwan, Haiti, Columbia, Hungary, Guatemala, Nicaragua, 
          Scotland, Thailand, Yugoslavia, El-Salvador, Trinadad&Tobago, Peru, Hong, Holand-Netherlands}
               native_country分成5类developNO1,developNO2,developNO3,developNO4,developNO5后:
         developNO1:(<0.1)       Outlying-US(Guam-USVI-etc),Vietnam,Mexico,Dominican-Republic,Laos,Haiti,Hungary,
				Guatemala,Nicaragua,Scotland,El-Salvador,Trinadad&Tobago,Holand-Netherlands	
											
         developNO2:(>=0.1 <0.2)  Puerto-Rico,South,China,Cuba,Poland,Jamaica,Portugal,Ireland,Ecuador,Peru,?

         developNO3:(>=0.2 <0.3)  Honduras,France,Columbia,United-States,England,Germany,Greece,Philippines,Thailand,                                  Yugoslavia
         developNO4:(>=0.3 <0.4)  India.Japan.
         developNO5:(>=0.4 )      Cambodia,Canada,Iran,Italy,Taiwan,Hong 

*/
UPDATE traindata2
SET native_country = 'developNO1'
WHERE native_country = ' Outlying-US(Guam-USVI-etc)' OR native_country = ' Vietnam' 
   OR native_country = ' Mexico' OR native_country = ' Dominican-Republic' 
   OR native_country = ' Laos' OR native_country = ' Haiti' 
   OR native_country = ' Hungary' OR native_country = ' Guatemala' 
   OR native_country = ' Nicaragua' OR native_country = ' Scotland' 
   OR native_country = ' El-Salvador' OR native_country = ' Trinadad&Tobago' 
   OR native_country = ' Holand-Netherlands'

UPDATE traindata2
SET native_country = 'developNO2'
WHERE native_country = ' Puerto-Rico' OR native_country = ' South' 
   OR native_country = ' China' OR native_country = ' Cuba' 
   OR native_country = ' Poland' OR native_country = ' Jamaica' 
   OR native_country = ' Portugal' OR native_country = ' Ireland' 
   OR native_country = ' Ecuador' OR native_country = ' Peru' 
   OR native_country = ' ?' 

UPDATE traindata2
SET native_country = 'developNO3'
WHERE native_country = ' Honduras' OR native_country = ' France' 
   OR native_country = ' Columbia' OR native_country = ' United-States' 
   OR native_country = ' England' OR native_country = ' Germany' 
   OR native_country = ' Greece' OR native_country = ' Philippines' 
   OR native_country = ' Thailand' OR native_country = ' Yugoslavia' 
        UPDATE traindata2
SET native_country = 'developNO4'
WHERE native_country = ' India' OR native_country = ' Japan'

UPDATE traindata2
SET native_country = 'developNO5'
WHERE native_country = ' Cambodia' OR native_country = ' Canada' 
   OR native_country = ' Iran' OR native_country = ' Italy' 
   OR native_country = ' Taiwan' OR native_country = ' Hong'                           




UPDATE traindata2
SET makeover50k = 'yes'
WHERE makeover50k = ' >50K'

UPDATE traindata2
SET makeover50k = 'no'
WHERE makeover50k = ' <=50K'



SELECT *
FROM traindata2

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -