ols.src

来自「没有说明」· SRC 代码 · 共 689 行 · 第 1/2 页

SRC
689
字号
/*
** ols.src - Least Squares Regression
** (C) Copyright 1988-1998 by Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC.    This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code.   In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
** { vnam,m,b,stb,vc,stderr,sigma,cx,rsq,resid,dwstat } =
**                                              OLS(dataset,depvar,indvars);
**
**> ols
**
**  Purpose:    To compute least squares regression.
**
**  Format:     { vnam,m,b,stb,vc,stderr,sigma,cx,rsq,resid,dwstat } =
**                        ols(dataset,depvar,indvars);
**
**  Input:  dataset     string, name of data set.
**
**                         If this is a null string, the procedure assumes
**                         that the actual data has been passed in the
**                         next two arguments.
**
**          depvar      dependent variable.
**
**                        If dataset contains the name of a data set, this is
**                        interpreted as:
**
**                            string, name of dependent variable
**                                or
**                            scalar, index of dependent variable. If scalar 0,
**                            the last column of the data set will be used.
**
**                        If dataset is a null string or 0, this is
**                        interpreted as:
**
**                            Nx1 vector, the dependent variable
**
**          indvars      independent variables.
**
**                         If dataset contains the name of a data set, this is
**                         interpreted as:
**
**                             Kx1 character vector, names of independent
**                             variables
**                                  or
**                             Kx1 numeric vector, indices of independent
**                             variables
**
**                             These can be any size subset of the variables in
**                             the data set, and can be in any order.  If a
**                             scalar 0 is passed, all columns of the data set
**                             will be used except for the one used for the
**                             dependent variable.
**
**                         If dataset is a null string or 0, this is
**                         interpreted as:
**
**                             NxK matrix, the independent variables
**
**              Defaults are provided for the following global input
**              variables so they can be ignored unless you need control
**              over the other options provided by this procedure.
**
**          __altnam     global vector, default 0.
**
**                          This can be a (K+2)x1 or (K+1)x1 character
**                          vector of alternate variable names for the
**                          output.  If __con is 1, this must be (K+2)x1.
**                          The dependent variable is the last element.
**                          This has an effect only if the data are passed
**                          in as matrices.
**
**          __con        global scalar, default 1.
**
**                          1   a constant term will be added, D = K+1.
**
**                          0   no constant term will be added, D = K.
**
**                          A constant term is always used in
**                          constructing the moment matrix m.
**
**          __miss       global scalar, default 0.
**
**                          0   there are no missing values (fastest).
**
**                          1   listwise deletion, drop any cases in
**                              which missings occur.
**
**                          2   pairwise deletion, this is equivalent to
**                              setting missings to 0 when calculating m.
**                              The number of cases computed is equal to
**                              the total number of cases in the data set.
**
**          __row        global scalar, the number of rows to read per
**                               iteration of the read loop. Default 0.
**
**                              If 0, the number of rows will be calculated
**                              internally.  If you get an "Insufficient memory"
**                              error while executing OLS you can supply a value
**                              for __row that works on your system.
**
**                              The answers may vary slightly due to rounding
**                              error differences when a different number of
**                              rows is read per iteration.  You can use __row
**                              to control this if you want to get exactly the
**                              same rounding effects between several runs.
**
**          __output      global scalar, default 1.
**
**                          1   print the statistics.
**
**                          0   do not print statistics.
**
**          _olsres       global scalar, default 0.
**
**                          1   compute residuals (resid) and
**                              Durbin-Watson statistic (dwstat).
**
**                          0   resid = 0, dwstat = 0.
**
**  Output:   vnam       (K+2)x1 or (K+1)x1 character vector, the variable
**                       names used in the regression.  If a constant term is
**                       used this vector will be (K+2)x1, and the first
**                       name will be "CONSTANT".  The last name will be the
**                       name of the dependent variable.
**
**            m          MxM matrix, where M = K+2, the moment matrix
**                       constructed by calculating x'x where x is a matrix
**                       containing all useable observations and having columns
**                       in the order:
**
**                   constant   ~     indvars        ~        depvar
**                   ------------------------------------------------------
**                    (1.0) ~ (independent variables) ~ (dependent variable)
**
**                   A constant term is always used in computing m, even if
**                   __CON = 0.
**
**            b          Dx1 vector, the least squares estimates of parameters.
**
**                     Error handling is controlled by the low order bit
**                     of the trap flag.
**
**                         TRAP 0     terminate with error message
**
**                         TRAP 1     return scalar error code in b
**
**                                30  system singular
**                                31  system underdetermined
**                                32  same number of columns as rows
**                                33  too many missings
**                                34  file not found
**                                35  no variance in an independent variable
**
**                     The system can become underdetermined if you use listwise
**                     deletion and have missing values.  In that case it is
**                     possible to skip so many cases that there are fewer
**                     useable rows than columns in the data set.
**
**            stb        Kx1 vector, the standardized coefficients.
**
**            vc         DxD matrix, the variance-covariance matrix of
**                       estimates.
**
**            stderr     Dx1 vector, the standard errors of the estimated
**                           parameters.
**
**            sigma      scalar, standard deviation of residual.
**
**            cx         (K+1)x(K+1) matrix, correlation matrix of variables
**                       in the order:
**
**                           independent variables ~ dependent variable
**
**            rsq      scalar, R square, coefficient of determination.
**
**            resid    residuals, resid = y - x * b
**
**                       If _olsres = 1, the residuals will be computed.
**
**                       If the data is taken from a data set, a new data
**                       set will be created for the residuals, using the
**                       name in the global string variable _olsrnam.  The
**                       residuals will be saved in this data set as an
**                       Nx1 column.  The resid return value will be a
**                       string containing the name of the new data set
**                       containing the residuals.
**
**                       If the data is passed in as a matrix, the
**                       resid return value will be the Nx1 vector of
**                       residuals.
**
**            dwstat     scalar, Durbin-Watson statistic.
**
**  Remarks:    No output file is modified, opened, or closed by this
**              procedure.  If you want output to be placed in a file
**              you need to open an output file before calling ols.
**              If a column of constant value has been included among
**              the independent variables, this variable will be
**              deleted.
**
**  Examples:   y = { 2,
**                    3,
**                    1,
**                    7,
**                    5 };
**
* *             x = { 1 3 2,
**                    2 3 1,
**                    7 1 7,
**                    5 3 1,
**                    3 5 5 };
**
**              output file = ols.out reset;
**              call ols(0,y,x);
**              output off;
**
**              In this example, the output from ols was put into a
**              file called ols.out as well as being printed on the
**              screen.  This example will to compute a least squares
**              regression of y on x.  The returned values were
**              discarded by using a call statement.
**
**              data = "olsdat";
**              depvar = { score };
**              indvars = { region,age,marstat };
**              _olsres = 1;
**              output file = lpt1 on;
**              { nam,m,b,stb,vc,std,sig,cx,rsq,resid,dbw } =
**                  ols(data,depvar,indvars);
**              output off;
**
**              In this example the data set, olsdat.dat was used
**              to compute a regression. The dependent variable
**              is "score". The independent variables are:
**              "region", "age", and "marstat".  The residuals
**              and Durbin-Watson statistic will be computed.
**              The output will be sent to the printer as well as
**              the screen and the returned values are assigned
**              to variables.
**
**  Globals:    __altnam, __output, __row, __miss __con, _olsres, _olsrnam,
**              indices2(), maxvec(), indexcat(), dotfeq()
**
**  See Also:   olsqr
*/

#ifDLLCALL
external proc indices2;
#else
external proc indices2,indexcat;
#endif

proc (11) = ols(dataset,depvar,indvars);

    local const,idat,fin,tobs,depindx,indindx,nvar,nr,k,dta,y0,mn,nc,cy,
        i,constflg,constvlu,vardx,std,vnames,cxx,cxxi,cxy,cyy,df,
        sse,nobs,mobs,be,b,vc,stderr,t,rsq,rbsq,fstat,pvf,pvt,ms,
        omat,mask,fmt,dwstat,mss,prcn,nvar1,cvec,old,u2,m,cor,constant,
        stdb,fout,u,str,tv,oldtrp,u0,cov,stdest,dd,cnstname;

    clear constflg,mobs,constant;
    const = __con;
    constvlu = 1;
    mss = { . };
    fin = -1;
    fout = -1;

    dataset = "" $+ dataset;
    if dataset $== "";
        if rows(indvars) /= rows(depvar);
            errorlog "ERROR: Matrices X and Y have different numbers of rows";
            end;
        endif;
        dta = indvars~depvar;
        idat = 1;
        tobs = rows(dta);
        nobs = tobs;
        nvar1 = cols(dta);
        nvar = nvar1-1;
        vardx = seqa(1,1,nvar1);
        indvars = 0$+"X"$+ftocv(seqa(1,1,nvar),__vpad*(floor(log(nvar))+1),0);
        depvar = 0$+"Y";
    else;
        idat = 0;
    /* open file using name in variable DATASET */
        open fin = ^dataset;
        if fin == -1;
            goto errout(34);
        endif;
        tobs = rowsf(fin);
        { depvar,depindx,indvars,indindx } = indices2(dataset,depvar,indvars);
        vardx = indindx|depindx;
        nobs = tobs;
        nvar = rows(indindx);
        nvar1 = nvar+1;

    /* Computation of max number of rows to read at one time */
        if __row;
            nr = __row;
        else;
            k = colsf(fin);
            nr = floor(minc(coreleft/(k*8*3.5)|maxvec/(k+1)));
        endif;
    endif;

    if __miss == 2;
        old = ndpcntrl(0,0);
        call ndpcntrl(1,1);
        clear mn,nc,m,i,nobs;
        constflg = ones(1,nvar1);
        do until i == tobs;
            i = i+1;
            if idat;
                y0 = dta[i,.];
            else;
                y0 = readr(fin,1);
                y0 = y0[vardx];
            endif;
            cy = (y0 .> 0 .or y0 .< 1);
            ndpclex;
            y0 = missrv(y0,0);
            m = m+y0'*y0;
            mn = mn + y0'*cy;
            nc = nc+cy'*cy;
            nobs = nobs+(cy /= 0);
        endo;
        call ndpcntrl(old,0xffff);
        if nc == 0;
            goto errout(31);
        endif;
        mobs = tobs-nobs;
        mn = mn./nc;
        m = m./nc;
    elseif idat;

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?