logit.src
来自「没有说明」· SRC 代码 · 共 1,297 行 · 第 1/3 页
SRC
1,297 行
/*
** logit.src - Multinomial Logit Analysis
**
**
** (C) Copyright 1988-1995 Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC. This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code. In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
**-------------------**------------------**-------------------**-----------**
**-------------------**------------------**-------------------**-----------**
**
**
** Purpose: To estimate the multinomial logit model using a GAUSS data set.
** The details of the model are presented in the remarks section.
**
** Format: { vnames,b,vc,ndtran,pct,meanx,sdx,fit,df,tol }
** = LOGIT(dataset,depvar,indvars);
**
** Input: dataset -- string, name of data file
**
** depvar -- string, name of dependent variable
** - or -
** scalar, index of dependent variable.
**
** The value of depvar will be truncated before analysis.
** Thus, 1.4 is treated as category 1.
**
** indvars -- Kx1 character vector, names of independent variables.
** - or -
** Kx1 numeric vector, indices of independent variables.
**
** The program adds one variable for the constant term.
**
**
** Output: vnames -- a (K+2)x1 character vector containing the names of
** the variables in the model. The order is:
** depvar|"CONSTANT"|indvars.
**
** b -- an NPARM=(NCAT-1)*(K+1) vector of parameter estimates in
** the order: intercepts|var1|var2|...varK. For each
** variable the parameters are in the order comparing the
** first category to NCAT, the second to NCAT, ... to
** NCAT-1 to NCAT. See below for details.
**
** If errors are encountered a message will be sent to the
** error log. Also, b will contain a scalar error code. This
** code appears as missing unless it is translated with
** the command scalerr(b). The codes are defined as:
**
** 1 data file not found
** 2 found undefined variables
** 30 system singular
** 31 too few nonmissing observations.
** 32 too many parameters
** 71 number of categories of dependent variable is less
** than 2
** 72 one of the outcome categories has no cases
** 73 an independent variable has no variation
** 74 can't open file for predicted values
** 75 out of disk space
** 77 all cases were deleted
**
** vc -- NPARMxNPARM variance covariance matrix for the parameters
** in b.
**
** ndtran -- 2x1 vector of observations. Element 1 contains
** number of cases read from dataset; element 2
** contains number of cases left after deletion of
** missing cases controlled by __miss, it is the
** number of cases used in the analysis.
**
** pct -- the percent of cases in each of the outcome categories.
** Arranged in order lowest to highest.
**
** meanx -- the means based on nused cases of the independent
** variables in the order in indvars.
**
** sdx -- the standard deviations based on nused cases of the
** independent variables in the order in indvars.
**
** fit -- 4x1 vector of goodness of fit measures. Element 1 is
** the likelihood ratio chi-square assessing the overall
** fit of the model; element 2 is -2 times the log
** likelihood function evaluated at the estimated values;
** element 3 is -2 times the log likelihood function
** evaluated with the slopes fixed to zero; element 4 is
** the percentage of correct predictions from the model.
**
** df -- the degrees of freedom associated with lrx2.
**
** tol -- the tolerance reached. If convergence was obtained,
** tol must be less than __tol.
**
**
** Globals:
** Defaults are provided for the following global input
** variables. They can be ignored unless you need control
** over the other options provided by this procedure.
**
** WARNING: If you change the defaults in a command file, the new
** values will apply in the next program you run using LOGIT unless
** you change them back. This can be done by running QUANTSET.
**
** __weight -- when set to a name or column number
** of weight a weight variable a weighted
** logit will be performed.
**
** __miss -- global scalar, default 1.
**
** if 0, there are no missing values (fastest).
**
** if 1, do listwise deletion, drop an observation
** if there are any missing values among the independent
** and dependent variables.
**
** __output -- global scalar, default 1.
**
** if 1, sends results to the output device (including
** the screen).
**
** if 0, no information is sent to output.
**
** __range -- 2*1 vector. The range of record in data
** set used for analysis. The first element is the
** starting row index, the second element is the
** endding row index.
**
** Default is the whole dataset.
**
** __row -- global scalar, default 0.
**
** if 0, the number of rows to read per iteration of the
** read loop is calculated by the program.
**
** if not 0, the specified number of rows will be read.
**
** __tol -- global scalar controlling the iterations. __tol
** indicates the maximum difference between estimates
** of the coefficients in two adjacent iterations.
**
** _qrcatnm -- NCATx1 character vector of names of outcome categories
** - or -
** default, scalar 0 in which case names CAT1, CAT2, ...
** are used.
**
** _qrev -- global scalar, default 0.
**
** If 0, the parameters are in the order of comparing the
** first category to the NCAT-th category, the second to
** the NCAT-th category, ..., and the (NCAT-1)-th to the
** NCAT-th category.
**
** If 1, the above order is reversed. That is, the NCAT-th
** category to the first category, the (NCAT-1)-th to the
** first category, ... and the second to the first category.
**
** _qrfit -- global scalar, default 0.
**
** if 1, print detailed goodness of fit measures, including
** table of observed and predicted outcomes.
**
** if 0, only print chi-square, -2*log-likehood and percent
** correctly predicted.
**
** _qriter -- global scalar, default 0.
**
** if 0, do not print information on iterations.
**
** if 1, send detailed information on iterations to
** the screen but not to the output device.
**
** if 2, send detailed information on iterations to
** the output device.
**
** _qrpred -- global scalar, default 0.
**
** if 0, predicted values will not be written to disk.
**
** if not 0, predicted probabilities for each outcome
** category are written to file ^_qrpred with
** NCAT+1 variables. The first ncat are PRED1,PRED2,
** ...,PREDNCAT. The last variable is the variable
** defined by the variable depvar.
**
** _qrpredn -- string name of dataset for predicted values. The
** default name is "_qrpred".
**
** _qrstat -- global scalar, default 0.
**
** if 0, do not print descriptive statistics.
**
** if 1, print descriptive statistics.
**
** _qrmiter -- maximum number of iterations, default = 1000.
**
**
** Remarks: See the manual for details on the model.
**
** Library: QUANTAL
**
** See Also: ORDERED, PROBIT, PSNREG
*/
#include gauss.ext
#include quantal.ext
proc(10) = LOGIT(dataset,depvar,indvars);
local bvec,bnew,bj,bmat,bstrt,cc,covar, dash,bchng,df,dl,err, errmsg,
ypred,pct,fin,pctlast,fl,fmt,g,i1,i2,ii,ij,iter, iv,j1,j2,nparms,
ll,lrx2,lrxp,m,mask,maxx,minx,meanx,nb, ncatm1,nivar,nr,range,
nused,obspred,omat,omat2,pctall,pml, s,sd,sdml,spc,sttime,success,
sx,tml,tm,tol,title,tzu,resid, vc,vcml,vv,xx,xy,ydum,mcfr2,cur2,
llfull,llrest,lastobs, bprt,madr2,prednm,oldnfmt,oldcfmt,varindx,
nd,start,counter, fpred,nout,oldtrap,nm,ncat,ylbl,xlbl,ycat1,x,
ynum,dta,count, readdisk,ndtran,inm,xb,cv,wx,vtype,iswt,wlbl,
windx, ky;
local wnused,wt;
/* Initialize */
clear nused,readdisk,inm;
oldnfmt = formatnv("");
oldcfmt = formatcv("");
open fin = ^dataset;
if fin == -1;
errmsg = "ERROR: Can't open file " $+ dataset $+ ".";
goto errout(error(1));
endif;
clear wnused;
sttime = hsec;
{ nm, varindx } = indices(dataset, depvar|indvars);
if scalerr(nm);
goto errout(nm);
endif;
ylbl = nm[1];
xlbl = "CONSTANT"|trimr(nm,1,0);
nivar = rows(nm);
if __weight $== 0;
wlbl = "";
windx = 0;
iswt = 0;
else; /* INCLUDES __WEIGHT IF DEFINED */
{ wlbl, windx } = indices(dataset, __weight);
if scalerr(wlbl);
goto errout(wlbl);
endif;
iswt = 1;
nm = nm|wlbl;
varindx = varindx|windx;
endif;
{ nm,vtype } = nametype(nm,__vtype);
vtype = vtype[1];
_qrycat = miss(0,0);
{ nr,start,counter,lastobs } = _rngchk(dataset,__range);
range = lastobs - start + 1;
ndtran = zeros(2,1);
ndtran[1] = range;
call seekr(fin,start);
nd = 0;
count = counter;
do while count < lastobs;
readdisk = readdisk+1;
dta = readr(fin,nr);
count = count + rows(dta);
if count > lastobs;
dta = trimr(dta,0,count-lastobs);
endif;
if __miss == 1;
dta = packr(dta[.,varindx]);
else;
dta = dta[.,varindx];
endif;
nd = rows(dta);
if nd == 0;
continue;
endif;
ndtran[2] = ndtran[2] + nd;
if iswt;
wt = dta[.,cols(dta)];
else;
wt = ones(rows(dta),1);
endif;
wnused = wnused+sumc(wt);
ynum = dta[.,1];
if vtype;
ynum = trunc(ynum);
endif;
if ismiss(ynum);
errmsg = "ERROR: Missing observations in dependent variable.";
goto errout(error(70));
endif;
if scalmiss(_qrycat);
_qrycat = unique(ynum,vtype);
else;
_qrycat = union(ynum,_qrycat,vtype);
endif;
endo;
nused = ndtran[2];
if nused == 0;
errmsg = "ERROR: No observations left after deleting missing values.";
goto errout(error(77));
endif;
readdisk = readdisk>1;
ncat = rows(_qrycat);
if ncat le 1;
errmsg = "ERROR: Too few dependent categories.";
goto errout(error(71));
endif;
ncatm1 = ncat-1;
ycat1 = _qrycat[1:ncatm1]';
obspred = zeros(ncat*ncat,1);
if vtype == 0;
_qrtmp = _qrycat;
elseif rows(_qrcatnm) .ne ncat;
_qrtmp = 0 $+ "Cat_"$+ftocv(_qrycat,1,0);
else;
_qrtmp = _qrcatnm;
inm = 1;
endif;
nparms = nivar*ncatm1; /* number of parameters */
if (nparms*nparms) gt maxvec;
errmsg = "ERROR: Too many parameters. Increase the value of GAUSS "\
"global variable maxvec then try again.";
goto errout(error(32));
endif;
if readdisk; /* only reset reader if data won't fit in memory */
minx = (1E+50)*ones(nivar,1);
maxx = -minx;
clear xx,xy,pct,meanx,count;
call seekr(fin,start);
count = counter;
do while count < lastobs;
dta = readr(fin,nr);
count = count + rows(dta);
if count > lastobs;
dta = trimr(dta,0,count-lastobs);
endif;
if __miss == 1;
dta = packr(dta[.,varindx]);
else;
dta = dta[.,varindx];
endif;
if scalmiss(dta);
continue;
endif;
if iswt;
wt = dta[.,cols(dta)];
dta = trimr(dta',0,1)';
else;
wt = ones(rows(dta),1);
endif;
x = ones(rows(dta),1)~trimr(dta',1,0)';
wx = x.*wt;
if vtype;
ynum = trunc(dta[.,1]) .== ycat1;
else;
ynum = dta[.,1] .$== ycat1;
endif;
xx = xx + wx'*x;
xy = xy + wx'*ynum;
pct = pct+sumc(wt.*ynum);
meanx = meanx+sumc(wx);
minx = minc((minx~minc(x))');
maxx = maxc((maxx~maxc(x))');
endo;
clear dta;
else;
/* prepare data for logit analysis */
ynum = dta[.,1];
if vtype;
ynum = trunc(ynum);
ydum = ynum .== ycat1;
else;
ydum = ynum .$== ycat1;
endif;
x = ones(rows(dta),1)~trimr(dta',1,0)';
if iswt;
x = trimr(x',0,1)';
endif;
wx = wt.*x;
clear dta;
pct = sumc(wt.*ydum);
xx = wx'*x;
xy = wx'*ydum;
meanx = sumc(wx);
minx = minc(x);
maxx = maxc(x);
endif;
if nused < nivar+1;
errmsg = "ERROR: Too few nonmissing observations.";
goto errout(error(31));
endif;
pct = pct/wnused; /* % in categories 1 to ncatm1 */
pctlast = 1-sumc(pct); /* % in category ncat */
pctall = pct|pctlast;
if minc(pctall) le .00000001;
errmsg = "ERROR: An outcome category has no observations.";
goto errout(error(72));
endif;
meanx = meanx/wnused; /* mean of indep vars */
sd = sqrt((diag(xx)./(wnused))-(meanx.*meanx));
if __output;
call header("LOGIT",dataset,_qr_ver);
print;
print "CASES PROCESSED BY LOGIT:";
print;
print " " ftos(nused,"%-*.*lf",1,0) " cases were kept out of "
ftos(range,"%-*.*lf",1,0) " in file.";
if iswt;
print;
print "WEIGHTING IS IN EFFECT: ";
print;
print (" Computations have used the weight variable: " $+ wlbl);
endif;
print;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?