logit.src

来自「没有说明」· SRC 代码 · 共 1,297 行 · 第 1/3 页

SRC
1,297
字号
/*
** logit.src - Multinomial Logit Analysis
**
**
** (C) Copyright 1988-1995  Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC.    This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code.   In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
**-------------------**------------------**-------------------**-----------**
**-------------------**------------------**-------------------**-----------**
**
**
** Purpose:  To estimate the multinomial logit model using a GAUSS data set.
**           The details of the model are presented in the remarks section.
**
** Format:   { vnames,b,vc,ndtran,pct,meanx,sdx,fit,df,tol }
**             = LOGIT(dataset,depvar,indvars);
**
** Input:    dataset -- string, name of data file
**
**           depvar -- string, name of dependent variable
**                        - or -
**                     scalar, index of dependent variable.
**
**                     The value of depvar will be truncated before analysis.
**                     Thus, 1.4 is treated as category 1.
**
**           indvars -- Kx1 character vector, names of independent variables.
**                         - or -
**                      Kx1 numeric vector, indices of independent variables.
**
**                      The program adds one variable for the constant term.
**
**
** Output:   vnames -- a (K+2)x1 character vector containing the names of
**                     the variables in the model.  The order is:
**                     depvar|"CONSTANT"|indvars.
**
**           b -- an NPARM=(NCAT-1)*(K+1) vector of parameter estimates in
**                the order:  intercepts|var1|var2|...varK.  For each
**                variable the parameters are in the order comparing the
**                first category to NCAT, the second to NCAT, ... to
**                NCAT-1 to NCAT.  See below for details.
**
**                If errors are encountered a message will be sent to the
**                error log.  Also, b will contain a scalar error code.  This
**                code appears as missing unless it is translated with
**                the command scalerr(b).  The codes are defined as:
**
**                      1   data file not found
**                      2   found undefined variables
**                      30  system singular
**                      31  too few nonmissing observations.
**                      32  too many parameters
**                      71  number of categories of dependent variable is less
**                          than 2
**                      72  one of the outcome categories has no cases
**                      73  an independent variable has no variation
**                      74  can't open file for predicted values
**                      75  out of disk space
**                      77  all cases were deleted
**
**           vc -- NPARMxNPARM variance covariance matrix for the parameters
**                 in b.
**
**           ndtran -- 2x1 vector of observations.  Element 1 contains
**                     number of cases read from dataset; element 2
**                     contains number of cases left after deletion of
**                     missing cases controlled by __miss, it is the
**                     number of cases used in the analysis.
**
**           pct -- the percent of cases in each of the outcome categories.
**                  Arranged in order lowest to highest.
**
**           meanx -- the means based on nused cases of the independent
**                    variables in the order in indvars.
**
**           sdx -- the standard deviations based on nused cases of the
**                  independent variables in the order in indvars.
**
**           fit -- 4x1 vector of goodness of fit measures.  Element 1 is
**                  the likelihood ratio chi-square assessing the overall
**                  fit of the model; element 2 is -2 times the log
**                  likelihood function evaluated at the estimated values;
**                  element 3 is -2 times the log likelihood function
**                  evaluated with the slopes fixed to zero; element 4 is
**                  the percentage of correct predictions from the model.
**
**           df -- the degrees of freedom associated with lrx2.
**
**           tol -- the tolerance reached.  If convergence was obtained,
**                  tol must be less than __tol.
**
**
** Globals:
**           Defaults are provided for the following global input
**           variables.  They can be ignored unless you need control
**           over the other options provided by this procedure.
**
**           WARNING:  If you change the defaults in a command file, the new
**           values will apply in the next program you run using LOGIT unless
**           you change them back.  This can be done by running QUANTSET.
**
**           __weight -- when set to a name or column number
**                       of weight a weight variable a weighted
**                       logit will be performed.
**
**           __miss -- global scalar, default 1.
**
**                     if 0, there are no missing values (fastest).
**
**                     if 1, do listwise deletion, drop an observation
**                     if there are any missing values among the independent
**                     and dependent variables.
**
**           __output -- global scalar, default 1.
**
**                       if 1, sends results to the output device (including
**                       the screen).
**
**                       if 0, no information is sent to output.
**
**           __range  -- 2*1 vector.  The range of record in data
**                       set used for analysis.  The first element is the
**                       starting row index, the second element is the
**                       endding row index.
**
**                       Default is the whole dataset.
**
**           __row -- global scalar, default 0.
**
**                    if 0, the number of rows to read per iteration of the
**                    read loop is calculated by the program.
**
**                    if not 0, the specified number of rows will be read.
**
**           __tol -- global scalar controlling the iterations.  __tol
**                    indicates the maximum difference between estimates
**                    of the coefficients in two adjacent iterations.
**
**          _qrcatnm -- NCATx1 character vector of names of outcome categories
**                       - or -
**                      default, scalar 0 in which case names CAT1, CAT2, ...
**                      are used.
**
**          _qrev  -- global scalar, default 0.
**
**                If 0, the parameters are in the order of comparing the
**                first category to the NCAT-th category, the second to
**                the NCAT-th category, ..., and the (NCAT-1)-th to the
**                NCAT-th category.
**
**                If 1, the above order is reversed. That is, the NCAT-th
**                category to the first category, the (NCAT-1)-th to the
**                first category, ... and the second to the first category.
**
**          _qrfit -- global scalar, default 0.
**
**                    if 1, print detailed goodness of fit measures, including
**                    table of observed and predicted outcomes.
**
**                    if 0, only print chi-square, -2*log-likehood and percent
**                    correctly predicted.
**
**          _qriter -- global scalar, default 0.
**
**                     if 0, do not print information on iterations.
**
**                     if 1, send detailed information on iterations to
**                     the screen but not to the output device.
**
**                     if 2, send detailed information on iterations to
**                     the output device.
**
**          _qrpred -- global scalar, default 0.
**
**                     if 0, predicted values will not be written to disk.
**
**                     if not 0, predicted probabilities for each outcome
**                     category are written to file ^_qrpred with
**                     NCAT+1 variables.  The first ncat are PRED1,PRED2,
**                     ...,PREDNCAT.  The last variable is the variable
**                     defined by the variable depvar.
**
**          _qrpredn -- string name of dataset for predicted values.  The
**                      default name is "_qrpred".
**
**          _qrstat -- global scalar, default 0.
**
**                     if 0, do not print descriptive statistics.
**
**                     if 1, print descriptive statistics.
**
**          _qrmiter -- maximum number of iterations, default = 1000.
**
**
** Remarks:  See the manual for details on the model.
**
** Library:  QUANTAL
**
** See Also: ORDERED, PROBIT, PSNREG
*/

#include gauss.ext
#include quantal.ext

proc(10) = LOGIT(dataset,depvar,indvars);

    local bvec,bnew,bj,bmat,bstrt,cc,covar, dash,bchng,df,dl,err, errmsg,
        ypred,pct,fin,pctlast,fl,fmt,g,i1,i2,ii,ij,iter, iv,j1,j2,nparms,
        ll,lrx2,lrxp,m,mask,maxx,minx,meanx,nb, ncatm1,nivar,nr,range,
        nused,obspred,omat,omat2,pctall,pml, s,sd,sdml,spc,sttime,success,
        sx,tml,tm,tol,title,tzu,resid, vc,vcml,vv,xx,xy,ydum,mcfr2,cur2,
        llfull,llrest,lastobs, bprt,madr2,prednm,oldnfmt,oldcfmt,varindx,
        nd,start,counter, fpred,nout,oldtrap,nm,ncat,ylbl,xlbl,ycat1,x,
        ynum,dta,count, readdisk,ndtran,inm,xb,cv,wx,vtype,iswt,wlbl,
        windx, ky;
    local wnused,wt;
    /* Initialize */
    clear nused,readdisk,inm;
    oldnfmt = formatnv("");
    oldcfmt = formatcv("");
    open fin = ^dataset;
    if fin == -1;
        errmsg = "ERROR:  Can't open file " $+ dataset $+ ".";
        goto errout(error(1));
    endif;
    clear wnused;

    sttime = hsec;

    { nm, varindx } = indices(dataset, depvar|indvars);
    if scalerr(nm);
        goto errout(nm);
    endif;
    ylbl = nm[1];
    xlbl = "CONSTANT"|trimr(nm,1,0);
    nivar = rows(nm);
    if __weight $== 0;
        wlbl = "";
        windx = 0;
        iswt = 0;
    else;           /* INCLUDES __WEIGHT IF DEFINED */
        { wlbl, windx } = indices(dataset, __weight);
        if scalerr(wlbl);
            goto errout(wlbl);
        endif;
        iswt = 1;
        nm = nm|wlbl;
        varindx = varindx|windx;
    endif;

    { nm,vtype } = nametype(nm,__vtype);
    vtype = vtype[1];
    _qrycat = miss(0,0);

    { nr,start,counter,lastobs } = _rngchk(dataset,__range);
    range = lastobs - start + 1;
    ndtran = zeros(2,1);
    ndtran[1] = range;

    call seekr(fin,start);
    nd = 0;
    count = counter;
    do while count < lastobs;
        readdisk = readdisk+1;
        dta = readr(fin,nr);
        count = count + rows(dta);
        if count > lastobs;
            dta = trimr(dta,0,count-lastobs);
        endif;
        if __miss == 1;
            dta = packr(dta[.,varindx]);
        else;
            dta = dta[.,varindx];
        endif;
        nd = rows(dta);
        if nd == 0;
            continue;
        endif;
        ndtran[2] = ndtran[2] + nd;
        if iswt;
            wt = dta[.,cols(dta)];
        else;
            wt = ones(rows(dta),1);
        endif;
        wnused = wnused+sumc(wt);
        ynum = dta[.,1];
        if vtype;
            ynum = trunc(ynum);
        endif;
        if ismiss(ynum);
            errmsg = "ERROR:  Missing observations in dependent variable.";
            goto errout(error(70));
        endif;
        if scalmiss(_qrycat);
            _qrycat = unique(ynum,vtype);
        else;
            _qrycat = union(ynum,_qrycat,vtype);
        endif;
    endo;
    nused = ndtran[2];
    if nused == 0;
        errmsg = "ERROR:  No observations left after deleting missing values.";
        goto errout(error(77));
    endif;
    readdisk = readdisk>1;
    ncat = rows(_qrycat);
    if ncat le 1;
        errmsg = "ERROR:  Too few dependent categories.";
        goto errout(error(71));
    endif;
    ncatm1 = ncat-1;
    ycat1 = _qrycat[1:ncatm1]';
    obspred = zeros(ncat*ncat,1);
    if vtype == 0;
        _qrtmp = _qrycat;
    elseif rows(_qrcatnm) .ne ncat;
        _qrtmp = 0 $+ "Cat_"$+ftocv(_qrycat,1,0);
    else;
        _qrtmp = _qrcatnm;
        inm = 1;
    endif;
    nparms = nivar*ncatm1;          /* number of parameters */

    if (nparms*nparms) gt maxvec;
        errmsg = "ERROR:  Too many parameters. Increase the value of GAUSS "\
            "global variable maxvec then try again.";
        goto errout(error(32));
    endif;

    if readdisk;    /* only reset reader if data won't fit in memory  */
        minx = (1E+50)*ones(nivar,1);
        maxx = -minx;
        clear xx,xy,pct,meanx,count;
        call seekr(fin,start);
        count = counter;
        do while count < lastobs;
            dta = readr(fin,nr);
            count = count + rows(dta);
            if count > lastobs;
                dta = trimr(dta,0,count-lastobs);
            endif;
            if __miss == 1;
                dta = packr(dta[.,varindx]);
            else;
                dta = dta[.,varindx];
            endif;
            if scalmiss(dta);
                continue;
            endif;
            if iswt;
                wt = dta[.,cols(dta)];
                dta = trimr(dta',0,1)';
            else;
                wt = ones(rows(dta),1);
            endif;
            x = ones(rows(dta),1)~trimr(dta',1,0)';
            wx = x.*wt;
            if vtype;
                ynum = trunc(dta[.,1]) .== ycat1;
            else;
                ynum = dta[.,1] .$== ycat1;
            endif;
            xx = xx + wx'*x;
            xy = xy + wx'*ynum;
            pct = pct+sumc(wt.*ynum);
            meanx = meanx+sumc(wx);
            minx = minc((minx~minc(x))');
            maxx = maxc((maxx~maxc(x))');
        endo;
        clear dta;
    else;

        /* prepare data for logit analysis */
        ynum = dta[.,1];
        if vtype;
            ynum = trunc(ynum);
            ydum = ynum .== ycat1;
        else;
            ydum = ynum .$== ycat1;
        endif;
        x = ones(rows(dta),1)~trimr(dta',1,0)';
        if iswt;
            x = trimr(x',0,1)';
        endif;
        wx = wt.*x;
        clear dta;
        pct = sumc(wt.*ydum);
        xx = wx'*x;
        xy = wx'*ydum;
        meanx = sumc(wx);
        minx = minc(x);
        maxx = maxc(x);
    endif;
    if nused < nivar+1;
        errmsg = "ERROR:  Too few nonmissing observations.";
        goto errout(error(31));
    endif;
    pct = pct/wnused;       /* % in categories 1 to ncatm1 */

    pctlast = 1-sumc(pct);          /* % in category ncat */
    pctall = pct|pctlast;
    if minc(pctall) le .00000001;
        errmsg = "ERROR:  An outcome category has no observations.";
        goto errout(error(72));
    endif;
    meanx = meanx/wnused;           /* mean of indep vars */
    sd = sqrt((diag(xx)./(wnused))-(meanx.*meanx));
    if __output;
        call header("LOGIT",dataset,_qr_ver);
        print;
        print "CASES PROCESSED BY LOGIT:";
        print;
        print "  " ftos(nused,"%-*.*lf",1,0) " cases were kept out of "
            ftos(range,"%-*.*lf",1,0) " in file.";
        if iswt;
            print;
            print "WEIGHTING IS IN EFFECT:  ";
            print;
            print ("  Computations have used the weight variable:  " $+ wlbl);
        endif;
        print;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?