cluster.ado

来自「是一个经济学管理应用软件 很难找的 但是经济学学生又必须用到」· ADO 代码 · 共 2,302 行 · 第 1/5 页

ADO
2,302
字号
*! version 1.10.3  22feb2005
program define cluster
	local vv : display "version " string(_caller()) ", missing:"
	/* because of change of "hamman" to "hamann" and associated change
	   to _cluster etc. in the executable, the following line will
	   complain if the executable has not been updated sufficiently
	*/
	version 8.1, born(01aug2003) missing
	/* but then we set version back again */
	version 7.0, missing

	gettoken subcmd 0 : 0 , parse(" ,")
	local subcmd = lower(`"`subcmd'"')

	if `"`subcmd'"' == "" | `"`subcmd'"' == "," {
		di as error "must specify a cluster subcommand"
		exit 198
	}

	if `"$S_Cl_stub"' == "" {
		/* if stub not already preset by user set it to default */
		global S_Cl_stub "_cl"
		local resetCl true
	}
	else {
		capture confirm name $S_Cl_stub
		if _rc {
			di as error "global S_Cl_stub contains invalid name"
			exit _rc
		}
	}

	capture noisily `vv' ClustWork , subcmd(`"`subcmd'"') rest(`"`0'"')

	local therc = _rc

	if `therc' {
		ClustClean /* clean up chars from any partial cluster runs */
	}
	if "`resetCl'" != "" { /* reset global stub if we set it */
		global S_Cl_stub
	}
	exit `therc'
end


program define ClustWork
	local vv : display "version " string(_caller()) ", missing:"
	version 7.0, missing
	syntax [, subcmd(str) rest(str) ]
	local len = length(`"`subcmd'"')
	if `"`subcmd'"' == substr("kmeans",1,max(1,`len')) {
		ClustKmeans "means" `rest'
		exit
	}
	if `"`subcmd'"' == substr("kmedians",1,max(4,`len')) {
		ClustKmeans "medians" `rest'
		exit
	}
	if `"`subcmd'"' == substr("singlelinkage",1,max(1,`len')) {
		ClustLink "single" `rest'
		exit
	}
	if `"`subcmd'"' == substr("averagelinkage",1,max(1,`len')) {
		ClustLink "average" `rest'
		exit
	}
	if `"`subcmd'"' == substr("completelinkage",1,max(1,`len')) {
		ClustLink "complete" `rest'
		exit
	}
	if `"`subcmd'"' == substr("centroidlinkage",1,max(4,`len')) {
		ClustLink "centroid" `rest'
		exit
	}
	if `"`subcmd'"' == substr("medianlinkage",1,max(3,`len')) {
		ClustLink "median" `rest'
		exit
	}
	if `"`subcmd'"' == substr("waveragelinkage",1,max(3,`len')) {
		ClustLink "waverage" `rest'
		exit
	}
	if `"`subcmd'"' == substr("wardslinkage",1,max(4,`len')) {
		ClustLink "wards" `rest'
		exit
	}
	if `"`subcmd'"' == substr("dendrogram",1,max(4,`len')) | /*
			*/ `"`subcmd'"' == substr("tree",1,max(2,`len')) {
		`vv' cluster_tree `rest'
		exit
	}
	if `"`subcmd'"' == substr("generate",1,max(3,`len')) {
		ClustGen `rest'
		exit
	}
	if `"`subcmd'"' == "dir" {
		ClustDir `rest'
		exit
	}
	if `"`subcmd'"' == "list" {
		ClustList `rest'
		exit
	}
	if `"`subcmd'"' == substr("notes",1,max(4,`len')) {
		ClustNote `rest'
		exit
	}
	if `"`subcmd'"' == "drop" {
		ClustDrop `rest'
		exit
	}
	if `"`subcmd'"' == "use" {
		ClustUse `rest'
		exit
	}
	if `"`subcmd'"' == "rename" {
		ClustRename `rest'
		exit
	}
	if `"`subcmd'"' == "renamevar" {
		ClustRenameVar `rest'
		exit
	}
	if `"`subcmd'"' == "query" {
		ClustQuery `rest'
		exit
	}
	if `"`subcmd'"' == "set" {
		ClustSet `rest'
		exit
	}
	if `"`subcmd'"' == substr("delete",1,max(3,`len')) {
		ClustDel `rest'
		exit
	}
	if `"`subcmd'"' == substr("parsedistance",1,max(9,`len')) {
		if trim(`"`rest'"') == "" {
			error 198
		}
		ParseDist , distance(`rest')
		exit
	}
	/* else call user defined command */
	cluster_`subcmd' `rest'
end


* ClustKmeans -- Kmeans and Kmedians cluster analysis (means and medians)
*
* name() -- cluster name.
* generate() -- name of variable to generate with cluster solution (group var).
* start() -- determines the k starting center values, see ParseStart for info.
* iterate() -- maximum iterations allowed.
* keepcenters -- appends the final center points (means or medians) for the
*                groups to the end of the dataset (adding k obs. to the data).
*
program define ClustKmeans
	gettoken typeflag 0 : 0
	if "`typeflag'" == "means" {
		local cmd `"cluster kmeans `0'"'
	}
	else if "`typeflag'" == "medians" {
		local cmd `"cluster kmedians `0'"'
	}
	syntax [varlist(numeric)] [if] [in], k(numlist int max=1 >0) /*
	*/ [ Name(str) GENerate(str) Start(str) ITERate(integer 10000) /*
	*/ KEEPcenters MEAsure(str) * ]

	marksample touse

	if `"`measure'"' != "" {
		if `"`options'"' != "" {
			opts_exclusive `"`options' measure(`measure')"'
		}
		local dop `"distance(`measure')"'
	}
	if `"`options'"' != "" {
		local dop `"distance(`options')"'
	}
	ParseDist , `dop'
	local dist `s(dist)'
	local dstargs `s(darg)'
	local dtype `s(dtype)'
	local isbin `s(binary)'
	local drange `s(drange)'

	local realN = _N           /* remember the true number of obs */
	local sortedby : sortedby  /* and how we were sorted */
	tempvar realid
	gen long `realid' = _n

	/* check for errors in start() option */
	ParseStart `k' `touse' `start'
	local stway `s(startway)'
	local stopt `s(startopt)'
	local stfull `s(start)'

	preserve /* in case of break--temporarily will be adding obs to end */

	/* for storing means (and medians if ties occur) at the end of the
	   data we need to recast byte, int, long, and float to
	   double.  When we drop the extra obs we will recast back. */
	local vtcnt 0
	foreach var of local varlist {
		local vtype : type `var'
		if "`vtype'" != "double" {
			recast double `var'
			local vtcnt = `vtcnt' + 1
			local vtdo`vtcnt' recast `vtype' `var'
		}
	}

	/* set up k seed obs at bottom of data taking care of touse var etc. */
	DoStart `k' `touse' `realid' `typeflag' `varlist' , /*
						*/ `stway' `stopt' `isbin'

	ClustSet `name', add type(partition) method(k`typeflag') other(k `k')/*
		*/ `dtype'(`dist'`dstargs') other(start `stfull') /*
		*/ other(range `drange')
	local cname `r(name)'

	capture noi {
		if `"`generate'"' == "" { /* default varname is cluster name */
			capture confirm new var `cname'
			if _rc {
				di as error /*
				   */ "unable to create new variable `cname';"
				di as error /*
			    */ "use the generate() option to specify a varname"
				exit _rc
			}
			local generate `cname'
		}
		else {
			confirm new var `generate'
		}
	}
	if _rc {
		ClustDrop `cname'
		exit _rc
	}

	capture noi {
		_cluster `varlist' if `touse', kmeans(`k') `dist'`dstargs' /*
			*/ generate(`generate') `typeflag' iterate(`iterate')
		ReformatDisp `generate'

		ClustSet `cname', var(group `generate') other(cmd `cmd') /*
			*/ other(varlist `varlist')

	}
	if _rc {
		local therc = _rc
		capture ClustDrop `cname'
		exit `therc'
	}

	if "`keepcenters'" != "" { /* keep the extra last k obs. */
		local Np1 = `realN' + 1
		qui replace `realid' = _n in `Np1'/l /* keep last k in order */
		forvalues i = 1/`vtcnt' {
			/* if possible put vars back to original type */
			capture `vtdo`i''
		}
		sort `sortedby' `realid' /* get back into sort order */
	}
	else { /* Drop the k seed obs. we added, change storage types back,
					and get back in original order */
		qui drop if `realid' >= .
		forvalues i = 1/`vtcnt' {
			`vtdo`i''
		}
		sort `sortedby' `realid' /* get back into sort order */
	}

	restore, not /* we need to keep our generated vars if all went well */
end


* ClustLink -- various Linkage Hierarchical Cluster Analyses
*
*	linkages: single, complete, average, centroid, median, waverage, wards
*
program define ClustLink
	gettoken method 0 : 0 , parse(" ,")
	local cmd `"cluster `method'linkage `0'"'
	syntax [varlist(numeric)] [if] [in] [, Name(str) GENerate(str) ///
						MEAsure(str) * ]

	marksample touse
	qui count if `touse'
	if r(N) <= 1 {
		di as err "insufficient observations"
		exit 2001
	}

	if `"`measure'"' != "" {
		if `"`options'"' != "" {
			opts_exclusive `"`options' measure(`measure')"'
		}
		local dop `"distance(`measure')"'
	}
	if `"`options'"' != "" {
		local dop `"distance(`options')"'
	}
	ParseDist , `dop' method(`method')
	local dist `s(dist)'
	local dstargs `s(darg)'
	local dtype `s(dtype)'
	local drange `s(drange)'

	ClustSet `name' , add type(hierarchical) method(`method') /*
			*/ `dtype'(`dist'`dstargs') other(range `drange')
	local cname `r(name)'

	capture noi ParseGen `cname' `generate'
	if _rc {
		ClustDrop `cname'
		exit _rc
	}
	local idvar `s(id)'
	local ordvar `s(ord)'
	local hgtvar `s(hgt)'
	local phtvar `s(pht)'
	local varops `"idvar(`idvar') ordvar(`ordvar') hgtvar(`hgtvar')"'
	local all `s(all)'
	if "`method'" == "single" {
		if "`all'" == "all" {
			local pivar `s(pi)'
			local lamvar `s(lam)'
			local varops /*
				*/ `"`varops' pivar(`pivar') lamvar(`lamvar')"'
		}
		else {
			tempvar tmp1 tmp2
			local varops `"`varops' pivar(`tmp1') lamvar(`tmp2')"'
		}
	}
	else {
		if "`all'" != "" {
			di as error "invalid generate() option"
			exit 198
		}
		local varops `"`varops' phtvar(`phtvar')"'
	}

	capture noi {
		_cluster `varlist' if `touse', `method' `dist'`dstargs' `varops'

		ReformatDisp `idvar' `ordvar' `hgtvar'

		if "`method'" == "single" {
			ClustSet `cname', var(id `idvar') var(order `ordvar') /*
							*/ var(height `hgtvar')

			if "`all'" == "all" {
				ReformatDisp `pivar' `lamvar'
				ClustSet `cname', var(pi `pivar') var(lambda `lamvar')
			}
		}
		else {
			CheckPHTvar `phtvar'
			if `r(nopht)' {
				qui drop `phtvar'
				ClustSet `cname', var(id `idvar') /*
					*/ var(order `ordvar') /*
					*/ var(height `hgtvar')
			}
			else {
				ClustSet `cname', var(id `idvar') /*
					*/ var(order `ordvar') /*
					*/ var(real_height `hgtvar') /*
					*/ var(pseudo_height `phtvar')
			}
		}

		ClustSet `cname', other(cmd `cmd') other(varlist `varlist')
	}
	if _rc {
		local therc = _rc
		capture ClustDrop `cname'
		exit `therc'
	}
end


* ClustGen -- generate variable(s) that summarize in one way or another the
*             information from a previous cluster analysis.
*
* To generate a group var (or vars) after a hier. cluster analysis by telling
* how many groups to split into.
*
*        cluster gen[erate] {name|stub} = gr[oups](numlist) [, name(clname)
*                                     ties({ error | skip | fewer | more }) ]
*
* Also have alias -ties(less)- for -ties(fewer)- for backwards compatibility
*
* To create a group var after a hier. cluster analysis by telling the value
* (dendrogram height) at which to cut.
*
*        cluster gen[erate] name = cut(numlist) [, name(clname) ]
*
program define ClustGen
	local orig0 `0'
	gettoken stub 0 : 0, parse(" =(")
	gettoken eqsign 0 : 0, parse(" =(")
	if `"`eqsign'"' != "=" {
		error 198
	}
	gettoken fcn 0 : 0, parse(" =(")
	gettoken fcnargs 0 : 0, parse(" ,") match(paren)
	if `"`paren'"' != "(" {
		error 198
	}
	local len = length(`"`fcn'"')

	ClustQuery
	local clnames `r(names)'
	if "`clnames'" == "" {
		di as error "no cluster solutions currently defined"
		exit 198
	}

  * ============================ groups() =================================

	/* -cluster generate <stub> = groups(<fcnargs>) ...- */
	if `"`fcn'"' == substr("groups",1,max(2,`len')) {
		syntax [, Name(str) Ties(str) ]

		if `"`ties'"' == "" {/* default -- error if ties encountered */
			local ties error
		}
		else {
			local ties = lower(`"`ties'"')
			local tielen = length(`"`ties'"')
			if `"`ties'"' == substr("error",1,max(1,`tielen')) {
				local ties error
			}
			if `"`ties'"' == substr("skip",1,max(1,`tielen')) {
				local ties skip
			}
			if `"`ties'"' == substr("fewer",1,max(1,`tielen')) {
				local ties less		// sic
			}
			if `"`ties'"' == substr("less",1,max(1,`tielen')) {
				local ties less
			}
			if `"`ties'"' == substr("more",1,max(1,`tielen')) {
				local ties more
			}
		}

		if `"`name'"' == "" {
			local name : word 1 of `clnames'
		}
		else {
			local wcnt : word count `name'
			if `wcnt' != 1 {
				error 198
			}

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?