📄 duplicates.ado
字号:
*! version 1.3.1 01apr2005
program define duplicates, sortpreserve rclass
version 8
* identify subcommand
gettoken cmd 0 : 0, parse(" ,")
local l = length("`cmd'")
if `l' == 0 {
di "{err}subcommand needed; see help on {help duplicates##|_new:duplicates}"
exit 198
}
if substr("report",1,max(1,`l')) == "`cmd'" {
local cmd "report"
}
else if substr("examples",1,max(1,`l')) == "`cmd'" {
local cmd "examples"
}
else if substr("list",1,max(1,`l')) == "`cmd'" {
local cmd "list"
}
else if substr("browse",1,max(1,`l')) == "`cmd'" {
local cmd "browse"
}
else if substr("tag",1,max(1,`l')) == "`cmd'" {
local cmd "tag"
}
else if "drop" == "`cmd'" {
* OK
}
else {
di "{err}illegal {cmd}duplicates {err}subcommand"
exit 198
}
* check rest of syntax
if "`cmd'" == "drop" {
capture syntax varlist [if] [in]
if _rc == 0 {
di "{err}force option required with " /*
*/ "{cmd}duplicates drop {it}varlist{rm}"
exit 198
}
capture syntax varlist [if] [in], force
if _rc {
syntax [if] [in]
unab varlist : _all
local varlist : /*
*/ subinstr local varlist "`_sortindex'" ""
local vartext "{txt} all variables"
}
else local vartext "{res} `varlist'"
}
else if "`cmd'" == "tag" {
syntax [varlist(default=none)] [if] [in], Generate(str)
capture confirm new variable `generate'
if _rc {
di as err "generate() must specify new variable"
exit _rc
}
if "`varlist'" == "" {
unab varlist : _all
local varlist : /*
*/ subinstr local varlist "`_sortindex'" ""
local vartext "{txt} all variables"
}
else local vartext "{res} `varlist'"
}
else if "`cmd'" == "browse" {
syntax [varlist(default=none)] [if] [in] [ , noLabel ]
if "`varlist'" == "" {
unab varlist : _all
local varlist : /*
*/ subinstr local varlist "`_sortindex'" ""
local vartext "{txt} all variables"
}
else local vartext "{res} `varlist'"
}
else {
syntax [varlist(default=none)] [if] [in] [ , * ]
if "`varlist'" == "" {
unab varlist : _all
local varlist : /*
*/ subinstr local varlist "`_sortindex'" ""
local vartext "{txt} all variables"
}
else local vartext "{res} `varlist'"
}
* duplicates with some values missing might be of interest
marksample touse, novarlist
* # of observations
qui count if `touse'
return scalar N = r(N)
if r(N) == 0 {
error 2000
}
tempvar order dgroup Ngroup example freq surplus uniq
/*
order 1 up _n when called
dgroup 0 if unique on varlist (not a "duplicated" group)
1 up labels groups which share identical values on varlist
Ngroup 1 if unique on varlist
2 up is # in each dgroup
example 1 to show if showing examples -- and to keep if -drop-
0 to drop if -drop-
freq # # in each group
surplus # # of surplus observations
uniq 1 first occurance by varlist
*/
di _n "{p 0 4}{txt}Duplicates in terms of `vartext'{p_end}"
gen long `order' = _n
bysort `touse' `varlist' : gen long `Ngroup' = _N
qui if "`cmd'" == "tag" {
gen long `generate' = `Ngroup' - 1 if `touse'
compress `generate'
exit 0
}
if "`cmd'" == "report" {
bysort `touse' `Ngroup' : gen long `freq' = _N
by `touse' `Ngroup' : gen long `surplus' = _N - _N / `Ngroup'
label var `Ngroup' "copies"
label var `freq' "observations"
label var `surplus' "surplus"
tabdisp `Ngroup' if `touse', cell(`freq' `surplus')
local varcount: word count `varlist'
qui bysort `touse' `varlist' (`order'): gen byte `uniq' = 1 ///
if _n==1 & `touse'
char `order'[varname] "obs:"
qui count if `uniq'==1
local uniqcnttol = r(N)
return scalar unique_value = `uniqcnttol'
exit 0
}
bysort `touse' `varlist' (`order') : /*
*/ gen byte `example' = (_N > 1) * (_n == 1) * `touse'
qui by `touse' `varlist' : gen `dgroup' = `example'[1]
qui replace `dgroup' = `dgroup' * sum(`example')
char `dgroup'[varname] "group:"
sort `dgroup' `order'
* bail out now if no duplicates
su `dgroup', meanonly
if `r(max)' == 0 {
di _n as txt "(0 observations are duplicates)"
exit 0
}
if "`cmd'" == "examples" {
char `order'[varname] "e.g. obs:"
char `Ngroup'[varname] "#"
if `r(max)' > 1 {
list `dgroup' `Ngroup' `order' `varlist' /*
*/ if `example', subvarname noobs `options'
}
else {
list `Ngroup' `order' `varlist' if `example', /*
*/ subvarname noobs `options'
}
}
else if "`cmd'" == "list" {
char `order'[varname] "obs:"
if `r(max)' > 1 {
list `dgroup' `order' `varlist' if `dgroup', /*
*/ subvarname noobs `options'
}
else {
list `order' `varlist' if `dgroup', /*
*/ subvarname noobs `options'
}
}
else if "`cmd'" == "browse" {
sort `order'
browse `varlist' if `dgroup', `label'
}
else if "`cmd'" == "drop" {
di
noisily drop if !`example' & `dgroup'
}
end
/* end */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -