📄 stsplit.ado
字号:
*! version 2.2.4 15sep2004
program define stsplit
version 7, missing
if _caller() < 6 {
ztspli_5 `0'
exit
}
st_is 2 full
if `"`_dta[st_id]'"' == "" {
di as err /*
*/ "stsplit requires that you have previously stset an id() variable"
exit 198
}
capture svyset
if "`r(_svy)'" ~= "" {
di as err "stsplit not allowed with survey data"
exit 498
}
syntax [newvarname(numeric)] [=exp] [if], [AT(str) EVery(str) *]
if `"`at'"' == "" & `"`every'"' == "" {
di as err "options at() or every() should be specified"
exit 198
}
else if `"`at'"' != "" & `"`every'"' != "" {
di as err "may not specify both at() and every() options"
exit 198
}
* n0 = n-of-obs before expansion
local n0 = _N
if `"`every'"' != "" {
confirm num `every'
Every `typlist' `varlist' `exp' `if' , every(`every') `options'
}
else if `"`at'"' == substr("failures",1,length(`"`at'"')) {
AtFailures `typlist' `varlist' `exp' `if' , `options'
}
else {
AtNumList `typlist' `varlist' `exp' `if' , at(`at') `options'
}
Created `n0'
end
* ============================================================================
* AtFailures -- performs episode splitting at the failure times
* ============================================================================
program define AtFailures
syntax [if/], [ STrata(varlist min=1 max=5) COdesplit(passthru) /*
*/ Riskset(str) List Fast noPREserve ]
if `"`riskset'"' != "" {
local 0 `riskset'
syntax newvarname
local rname `varlist'
local rtype `typlist'
}
* sample selection (beware: touse is -1/0 coded)
* ----------------------------------------------
quietly {
tempvar touse
gen byte `touse' = _st
if `"`if'"' != "" {
replace `touse' = 0 if !(`if')
}
if "`strata'" == "" {
replace `touse' = -`touse'
}
else {
markout `touse' `strata' , strok
count if `touse' == 0 & _st == 1
if r(N) > 0 {
noi di as txt r(N) /*
*/ " records marked _st==0 due to missings in strata (`strata')"
replace _st = 0 if `touse' == 0
}
replace `touse' = -`touse'
* strata() are identified by a single variable
* referred to by the tempvar -Strata-, coded 1 2 ...
tempvar Strata
bys `touse' `strata' : gen long `Strata' = _n==1 if `touse'
replace `Strata' = sum(`Strata')
compress `Strata'
}
* identify the distinct failure times T()
* ----------------------------------------
* leave data so that the failure times T[1]<T[2]<..T[nevent] are sorted
* and in the first nevent observations
*
* if strata() are specified, the sorted failure times are in subsequent
* (sets of) observations per stratum
tempvar event
bys `touse' `Strata' _t (_d) : gen `event' = cond(_n==_N & _d==1, -1, .) if `touse'
sort `touse' `event' `Strata' _t
local nobs = _N
count if `event' == -1
local nevent = r(N)
if `nevent' == 0 {
noi di as txt "(there are no failures)"
exit 0
}
* count distinct failure times within strata
* nEventStrata = #failures per stratum (may be 0)
if "`strata'" != "" {
tempname nEventStrata mfail
gen byte `mfail' = `event' == -1
tab `Strata', matcell(`nEventStrata') subpop(`mfail')
local nStrata = rowsof(`nEventStrata')
drop `mfail'
forv is = 1/`nStrata' {
if `nEventStrata'[`is',1] == 0 {
local zerofound 1
}
}
if "`zerofound'" != "" {
noi di as txt "note: there are strata without failures"
}
}
* use obs 1/nevent of tempvar T as a array with failure times
* this facilitates efficient code below
tempvar T
gen double `T' = _t in 1/`nevent'
compress `event' `T'
} /* quietly */
* list the failure times
* ----------------------
di as txt "(`nevent' failure time" cond(`nevent'>1, "s)", ")")
if "`list'" != "" {
local lsize : set linesize
local ncol = int((`lsize'-1)/9)
if "`strata'" == "" {
forv ev = 1/`nevent' {
di %9.0g `T'[`ev'] _c
if mod(`ev',`ncol') == 0 { di }
}
if mod(`ev'-1,`ncol') != 1 { di }
di
}
else {
local ev2 0
forv is = 1/`nStrata' {
di as txt _n "stratum `is'"
if `nEventStrata'[`is',1] > 0 {
local ev1 = `ev2' + 1
local ev2 = `ev1' + `nEventStrata'[`is',1] - 1
local ev `ev1'
while `ev' <= `ev2' {
di as res %9.0g `T'[`ev'] _c
local ev = `ev' + 1
if mod(`ev'-`ev1',`ncol') == 0 { di }
}
if mod(`ev'-`ev1'+1,`ncol') != 1 { di }
}
else di as res _col(9) "no failures"
}
}
}
* ---------------------------------------------------------------------
* failure times in the interior of an episode are indexed ifirst..ilast
* within the list T of failure times (strata are accounted for)
*
* ifirst/ilast are missing if no internal failure time
*
* algorithm: bisection, which is much faster than a linear search to
* determine the smallest T > _t0 and the largest T < _t.
* ---------------------------------------------------------------------
quietly {
tempvar ifirst ilast ilow ihigh il ih im busy
* ilow .. ihigh is range of T-values to be scanned. While this range is
* stratum specific, the rest of the code need no longer reflect strata.
if "`strata'" == "" {
gen long `ilow' = 1
gen long `ihigh' = `nevent'
}
else {
gen long `ilow' = .
gen long `ihigh' = .
local Ihigh 0
forv is = 1/`nStrata' {
if `nEventStrata'[`is',1] > 0 {
local Ilow = `Ihigh' + 1
local Ihigh = `Ilow' + `nEventStrata'[`is',1] - 1
replace `ilow' = `Ilow' if `Strata'==`is'
replace `ihigh' = `Ihigh' if `Strata'==`is'
}
}
}
* binary search : ifirst = index smallest T > _t0
gen long `il' = `ilow'
gen long `ih' = `ihigh'
gen long `im' = .
gen byte `busy' = (`T'[`ih'] > _t0) & (`ih' > `il') if `touse'
count if `busy' == 1
while r(N) > 0 {
replace `im' = int((`il'+`ih')/2) if `busy'==1
replace `ih' = `im' if `T'[`im'] > _t0 & `busy'==1
replace `il' = `im' if `T'[`im'] <= _t0 & `busy'==1
replace `busy' = `ih' - `il' > 1 if `busy'==1
count if `busy' == 1
}
replace `ih' = `ih'-1 if `ih'>`ilow' & `T'[`ih'-1]>_t0 & `touse'
gen long `ifirst' = cond(`T'[`ih'] > _t0, `ih', .) if `touse'
/*
Assert 1 _t0 < `T'[`ifirst'] if `ifirst'<. & `touse'
Assert 2 _t0 >= `T'[`ifirst'-1] if `ifirst'<. & `ifirst'>`ilow' & `touse'
Assert 3 _t0 <= `T'[`ihigh'] if `ifirst'>=. & `touse'
*/
* binary search : ilast = index largest T < _t
drop `il' `ih' `im' `busy'
gen long `il' = `ilow'
gen long `ih' = `ihigh'
gen long `im' = .
gen byte `busy' = (`T'[`il'] < _t) & (`ih' > `il') if `touse'
count if `busy' == 1
while r(N) > 0 {
replace `im' = int((`il'+`ih')/2) if `busy'==1
replace `il' = `im' if `T'[`im'] < _t & `busy'==1
replace `ih' = `im' if `T'[`im'] >= _t & `busy'==1
replace `busy' = `ih' - `il' > 1 if `busy'==1
count if `busy' == 1
}
replace `il' = `il'+1 if `il'<`ihigh' & `T'[`il'+1]<_t & `touse'
gen long `ilast' = cond(`T'[`il'] < _t, `il', .) if `touse'
/*
Assert 4 `T'[`ilast'] < _t if `ilast'<. & `touse'
Assert 5 `T'[`ilast'+1] >= _t if `ilast'<`ihigh' & `touse'
Assert 6 `T'[`ilow'] >= _t if `ilast'>=. & `touse'
*/
* set ifirst/ilast to missing if not both in the interior
replace `ilast' = . if `ifirst' >= . | `ilast' < `ifirst'
replace `ifirst' = . if `ilast' >= .
* perform episode splitting at the marked failure times
* -----------------------------------------------------
tempvar xid evid nrec markfl firstn
count if `touse'
local n0 = r(N)
gen long `nrec' = cond(`ifirst'< .,`ilast'-`ifirst'+2, 1) in 1/`n0'
gen long `xid' = _n in 1/`n0' /* identifier for splitted records */
gen long `firstn' = _n in 1/`nevent'
drop `busy' `il' `ih' `im' `ilow' `ihigh' `ilast'
if "`preserve'" == "" & "`fast'" == "" {
preserve
local Done "restore, not"
}
compress `firstn' `nrec' `ifirst' `xid'
noi SaveExpand = `nrec' in 1/`n0'
if `n0' < _N {
local n1 = `n0'+1
replace `firstn' = . in `n1' / l
}
sort `xid'
* mark episodes first=1, last=2, inbetween=0
by `xid' : gen byte `markfl' = 1*(_n==1) + 2*(_n==_N) if `nrec' > 1
* mark episode with nr of failure time being split on
by `xid' : gen long `evid' = `ifirst' + _n - 1
* all splitted episodes are ended by non-failure, marked -1 for now
by `xid' : replace _d = -1 if _n < _N
* ensure that T is correctly index (sorted)
sort `firstn'
* replace _t0 in all but the first record within xid
replace _t0 = `T'[`evid'-1] if `markfl'==0 | `markfl'==2
* replace _t in all but the last record within xid
replace _t = `T'[`evid'] if `markfl'==0 | `markfl'==1
* modify user variables, reset _d
ModifyUserVars, `codesplit'
* identifier for risk sets
* ------------------------
if "`riskset'" != "" {
tempvar mind x
gen byte `mind' = - _d
bys `touse' `Strata' _t (`mind') : gen `x' = cond(_n==1 & _d==1, 1, .) if `touse'
gen `rtype' `rname' = sum(`x') if `touse'
* risk set is missing at times without failures
by `touse' `Strata' _t (`mind') : replace `rname' = . if _d[1]==0
label var `rname' "identifier of risk sets"
compress `rname'
}
`Done'
} /* quietly */
end
* ============================================================================
* AtNumlist -- performs at() episode splitting
* ============================================================================
program define AtNumList
syntax newvarname(numeric) [=exp] [if], at(str) /*
*/ [ AFter(passthru) COdesplit(passthru) noPREserve Fast TRIM ]
local vname `varlist'
local vtype `typlist'
quietly {
marksample touse, novar
replace `touse' = 0 if _st == 0
if `"`exp'`after'"' != "" {
tempvar Base
After `Base' `exp' if `touse', `after'
}
else {
local Base 0
local range "range(>=0)" /* at() should be positive */
}
* parse at()
* ----------
* substitute max in -at-
local tmp : subinstr local at "max" "", count(local nch)
if `nch' > 0 {
tempvar basetime
gen `basetime' = _t + `Base'
summ `basetime' if _st, meanonly
local maxp = r(max)+1
local at : subinstr local at "max" "`maxp'" , all
drop `basetime'
}
numlist "`at'" , sort `range'
DropDup at : "`r(numlist)'"
* store at into at1, at2, ...
tokenize `at'
local nat : word count `at'
forv i = 1/`nat' {
local at`i' ``i''
}
* determine internal split times
* ------------------------------
* ifirst = min_j { _t0+Base > a_j } (missing if set is empty)
* ilast = max_j { _t +Base < a_j } (missing if set is empty)
tempvar ifirst ilast
gen long `ifirst' = .
gen long `ilast' = .
forv i = 1/`nat' {
replace `ifirst' = `i' if `ifirst' >= . & /*
*/ (float(`at`i'') > float(_t0+`Base')) & `touse'
}
forv i = `nat'(-1)1 {
replace `ilast' = `i' if `ilast' >= . & /*
*/ (float(`at`i'') < float(_t+`Base')) & `touse'
}
* perform episode splitting
* -------------------------
tempvar xid evid nrec markfl
gen long `nrec' = cond(`ilast'-`ifirst'< .,`ilast'-`ifirst'+2, 1) if `touse'
gen long `xid' = _n if `touse'
if "`preserve'" == "" & "`fast'" == "" {
preserve
local Done "restore, not"
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -