📄 bsample.ado
字号:
*! version 2.0.3 08mar2004
program define bsample,
version 8.0
// version control
if _caller() < 8 {
bsample_7 `0'
exit
}
capture syntax [, weight(varname) ]
if _rc == 0 {
// fast method for resample of size _N
tempvar w /// frequency weight variable
r /// discrete uniform variate: obs id
// blank
gen double `r' = int(uniform()*_N + 1)
gen double `w' = uniform()
sort `r' `w'
quietly {
replace `w' = cond(`r' == `r'[_n-1], `w'[_n-1]+1, 1)
if "`weight'" != "" {
replace `w' = 0 if !(`w'[_n+1] == 1 | _n == _N)
replace `weight' = `w'
}
else {
keep if `w'[_n+1] == 1 | _n == _N
expand `w'
}
} // quietly
exit
}
// push "=" as first token when there is an expression
gettoken comma : 0, parse("=, ")
if !inlist(`"`comma'"',"",",","if","in") {
local 0 `"= `0'"'
}
syntax [=/exp] [if] [in] [, ///
CLuster(varlist) ///
IDcluster(string) ///
STRata(varlist) ///
Weight(varname) ///
]
// mark sample
tempvar touse
mark `touse' `if' `in'
markout `touse' `strata' , strok
// no need to sort on touse if it includes the whole dataset
quietly count if `touse'
local nmax = r(N)
if `nmax' < _N {
quietly replace `touse' = . if !`touse'
}
else {
local touse
}
if `"`touse'`strata'`cluster'"' != "" {
tempvar obsid
gen `obsid' = _n
sort `touse' `strata' `cluster' `obsid'
}
if `"`cluster'"' == "" {
if "`idcluster'" != "" {
di as err ///
"idcluster() can only be specified with the cluster() option"
exit 198
}
if `"`strata'"' == "" {
quietly ///
SRSWR ///
`nmax' ///
`"`exp'"' ///
`"`weight'"' ///
// blank
}
else {
quietly ///
StrSRSWR ///
`"`touse'"' ///
`"`exp'"' ///
`"`weight'"' ///
`"`strata'"' ///
// blank
}
}
else {
confirm variable `cluster'
if "`idcluster'" != "" {
if `"`weight'"' != "" {
di as err ///
"options idcluster() and weight() may not be combined"
exit 198
}
capture confirm new variable `idcluster'
if _rc {
confirm variable `idcluster'
drop `idcluster'
}
}
if `"`strata'"' == "" {
quietly ///
ClustSRSWR ///
`nmax' ///
`"`touse'"' ///
`"`exp'"' ///
`"`weight'"' ///
`"`cluster'"' ///
`"`idcluster'"' ///
// blank
}
else {
quietly ///
StrClustSRSWR ///
`"`touse'"' ///
`"`exp'"' ///
`"`weight'"' ///
`"`strata'"' ///
`"`cluster'"' ///
`"`idcluster'"' ///
// blank
}
}
end
// simple random sample with replacement
program SRSWR
args nmax ///
exp ///
weight ///
// blank
tempvar w /// frequency weight variable
r /// discrete uniform variate: obs id
// blank
if `"`exp'"' != "" {
tempvar nsamp
gen `nsamp' = `exp' in 1/`nmax'
capture assert `nsamp'==`nsamp'[1] in 2/`nmax'
if _rc {
di as err "expression: `exp': is not constant"
exit 198
}
local nsamp = `nsamp'[1]
capture assert abs(`nsamp'-int(`nsamp')) < 1e-7
if _rc {
di as err "expression: `exp': is not an integer"
exit 198
}
capture assert `nsamp' > 0
if _rc {
di as err "resample size must be greater than zero"
exit 498
}
if `nsamp' > `nmax' {
di as err ///
"resample size must not be greater than number of observations"
exit 498
}
}
else local nsamp `nmax'
gen `r' = int(uniform()*`nmax' + 1)
gen double `w' = uniform()
sort `r' `w' in 1/`nmax'
replace `w' = cond(`r' == `r'[_n-1],`w'[_n-1]+1,1) in 1/`nmax'
if "`weight'" != "" {
replace `w' = 0 ///
if ! (`w'[_n+1] == 1 | _n == `nsamp') | _n > `nsamp'
replace `weight' = 0
replace `weight' = `w' in 1/`nmax'
}
else {
keep if `w'[_n+1] == 1 | _n == `nmax'
expand `w'
if (`nsamp' == `nmax') exit
replace `w' = uniform()
sort `w'
keep in 1/`nsamp'
}
end
// stratified simple random sample with replacement
program StrSRSWR
args touse ///
exp ///
weight ///
strata ///
// blank
tempvar w /// frequency weight variable
r /// discrete uniform variate: obs id
// blank
local by "by `touse' `strata':"
if `"`touse'"' != "" {
local iftouse "if `touse'==1"
tempvar nmax
}
if `"`exp'"' != "" {
tempvar nsamp
`by' gen `nsamp' = `exp' `iftouse'
capture `by' assert `nsamp'==`nsamp'[1] `iftouse'
if _rc {
di as err ///
"expression: `exp': is not constant within strata"
exit 198
}
capture assert abs(`nsamp'-int(`nsamp')) < 1e-7 `iftouse'
if _rc {
di as err "expression: `exp': is not an integer"
exit 198
}
capture assert `nsamp' > 0 `iftouse'
if _rc {
di as err "resample size(s) must be greater than zero"
exit 498
}
capture `by' assert `nsamp' <= _N `iftouse'
if _rc {
di as err ///
"resample sizes must not be greater than number of observations with strata"
exit 498
}
}
else local nsamp _N
`by' gen `r' = int(uniform()*_N + 1)
gen double `w' = uniform()
sort `touse' `strata' `r' `w'
`by' replace `w' = cond(`r' == `r'[_n-1],`w'[_n-1]+1,1)
if "`weight'" != "" {
`by' replace `w' = 0 ///
if !(`w'[_n+1] == 1 | _n == `nsamp') | (_n > `nsamp')
replace `weight' = `w'
if `"`iftouse'"' != "" {
replace `weight' = 0 if `touse' != 1
}
}
else {
`by' keep if `w'[_n+1] == 1 | _n == _N
expand `w'
capture by `touse' `strata': assert `nsamp' == _N `iftouse'
if (! _rc) exit
replace `w' = uniform()
sort `touse' `strata' `w'
`by' keep if _n<=`nsamp'
if `"`iftouse'"' != "" {
keep `iftouse'
}
}
end
// clustered simple random sample with replacement
program ClustSRSWR
args nmax ///
touse ///
exp ///
weight ///
cluster ///
idcluster ///
// blank
tempvar w /// frequency weight variable
r /// discrete uniform variate: cluster id
obsid /// observation id
clid /// cluster id
nclust /// number of clusters
// blank
by `touse' `cluster': gen byte `clid' = (_n == 1)
replace `clid' = sum(`clid') in 1/`nmax'
local nclust = `clid'[`nmax']
if `nclust' == 1 {
di as err "singleton cluster detected"
exit 460
}
if `"`exp'"' != "" {
tempvar nsamp
gen `nsamp' = `exp' in 1/`nmax'
capture assert `nsamp'==`nsamp'[1] in 1/`nmax'
if _rc {
di as err "expression: `exp': is not constant"
exit 198
}
local nsamp = `nsamp'[1]
capture assert abs(`nsamp'-int(`nsamp')) < 1e-7
if _rc {
di as err "expression: `exp': is not an integer"
exit 198
}
capture assert `nsamp' > 0
if _rc {
di as err "resample size(s) must be greater than zero"
exit 498
}
if `nsamp' > `nclust' {
di as err ///
"resample size must not be greater than number of clusters"
exit 498
}
}
else local nsamp `nclust'
// Generate bootstrap samples of `clid' in 1/`nclust'
gen `obsid' = _n
gen double `r' = int(uniform()*`nclust' + 1) in 1/`nclust'
gen double `w' = uniform() in 1/`nclust'
sort `r' `w' `obsid' in 1/`nmax'
replace `w' = cond(`r' == `r'[_n-1],`w'[_n-1]+1,1) in 1/`nclust'
if "`weight'" != "" {
replace `w' = 0 ///
if ! (`w'[_n+1] == 1 | _n == `nsamp') | _n > `nsamp'
replace `r' = `w'[`obsid'] in 1/`nclust'
replace `w' = `r'[`clid']
replace `weight' = 0
replace `weight' = `w' in 1/`nmax'
}
else {
local ncl1 = `nclust' - 1
replace `w' = 0 if `w'[_n+1] != 1 in 1/`ncl1'
replace `r' = `w'[`obsid'] in 1/`nclust'
replace `w' = `r'[`clid']
keep if `w'
expand `w'
if ( (`nsamp' != `nclust') | ("`idcluster'" != "") ) {
NewClustID ///
`"`idcluster'"' ///
`""' ///
`"`cluster'"' ///
`"`nclust'"' ///
`"`clid'"' ///
`"`obsid'"' ///
`"`r'"' ///
`"`w'"' ///
`"`nsamp'"'
}
}
end
// stratified and clustered simple random sample with replacement
program StrClustSRSWR
args touse ///
exp ///
weight ///
strata ///
cluster ///
idcluster ///
// blank
tempvar w /// frequency weight variable
r /// discrete uniform variate: cluster id
obsid /// observation id
clid /// cluster id
nclust /// number of clusters
// blank
if `"`touse'"' != "" {
local iftouse "if `touse'==1"
local andtouse "& `touse'==1"
}
local by "by `touse' `strata' :"
by `touse' `strata' `cluster' : gen byte `clid' = (_n == 1) `iftouse'
`by' replace `clid' = sum(`clid') `iftouse'
`by' gen `nclust' = `clid'[_N] `iftouse'
capture assert `nclust' != 1 `iftouse'
if _rc {
di as err "singleton cluster detected"
exit 460
}
if `"`exp'"' != "" {
tempvar nsamp
`by' gen `nsamp' = `exp' `iftouse'
capture `by' assert `nsamp'==`nsamp'[1] `iftouse'
if _rc {
di as err ///
"expression: `exp': is not constant within strata"
exit 198
}
capture assert abs(`nsamp'-int(`nsamp')) < 1e-7 `iftouse'
if _rc {
di as err "expression: `exp': is not an integer"
exit 198
}
capture assert `nsamp' > 0
if _rc {
di as err "resample size(s) must be greater than zero"
exit 498
}
capture assert `nsamp' <= `nclust' `iftouse'
if _rc {
di as err
"resample size must not be greater than number of clusters"
exit 498
}
}
else local nsamp `nclust'
/// Generate bootstrap samples of `clid' in 1/`nclust' by strata
`by' gen `obsid' = _n
`by' gen double `r' = int(uniform()*`nclust' + 1) ///
if _n <= `nclust' `andtouse'
`by' gen double `w' = uniform() ///
if _n <= `nclust' `andtouse'
sort `touse' `strata' `r' `w' `obsid'
`by' replace `w' = cond(`r' == `r'[_n-1],`w'[_n-1]+1,1) ///
if _n <= `nclust'
if "`weight'" != "" {
`by' replace `w' = 0 ///
if (`w'[_n+1] != 1 & _n < `nsamp') | _n > `nsamp'
`by' replace `r' = `w'[`obsid']
`by' replace `w' = `r'[`clid']
replace `weight' = `w'
if `"`iftouse'"' != "" {
replace `weight' = 0 if `touse' != 1
}
}
else {
`by' replace `w' = 0 ///
if ! (`w'[_n+1] == 1 | _n == `nclust') | _n > `nclust'
`by' replace `r' = `w'[`obsid'] if _n <= `nclust'
`by' replace `w' = `r'[`clid']
if `"`iftouse'"' != "" {
keep `iftouse' & `w'
}
else keep if `w'
expand `w'
capture assert `nsamp' == `nclust'
if ( _rc | ("`idcluster'" != "") ) {
NewClustID ///
`"`idcluster'"' ///
`"`strata'"' ///
`"`cluster'"' ///
`"`nclust'"' ///
`"`clid'"' ///
`"`obsid'"' ///
`"`r'"' ///
`"`w'"' ///
`"`nsamp'"'
}
}
end
program NewClustID
args idcluster ///
strata ///
cluster ///
nclust ///
clid ///
obsid ///
r ///
w ///
nsamp ///
// blank
if `"`strata'"' != "" {
local by "by `strata':"
}
// Create new cluster id variable
sort `strata' `clid' `obsid'
by `strata' `clid' `obsid': replace `r' = (`clid'-1)*`nclust'+_n-1
sort `strata' `r' `obsid'
// randomly order clusters
capture assert `nsamp' == `nclust'
if _rc {
by `strata' `r': replace `w' = cond(_n == 1,uniform(),`w'[_n-1])
// sort by `r' in case of ties in `w'
sort `strata' `w' `r' `obsid'
}
else local w
// Make cluster id be 1, 2, 3, ....
by `strata' `w' `r': replace `clid' = (_n == 1)
`by' replace `clid' = sum(`clid')
keep if `clid' <= `nsamp'
if "`idcluster'" != "" {
label variable `clid' "Bootstrap sample cluster id"
rename `clid' `idcluster'
}
end
exit
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -