ols.src
来自「没有说明」· SRC 代码 · 共 689 行 · 第 1/2 页
SRC
689 行
/*
** ols.src - Least Squares Regression
** (C) Copyright 1988-1998 by Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC. This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code. In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
** { vnam,m,b,stb,vc,stderr,sigma,cx,rsq,resid,dwstat } =
** OLS(dataset,depvar,indvars);
**
**> ols
**
** Purpose: To compute least squares regression.
**
** Format: { vnam,m,b,stb,vc,stderr,sigma,cx,rsq,resid,dwstat } =
** ols(dataset,depvar,indvars);
**
** Input: dataset string, name of data set.
**
** If this is a null string, the procedure assumes
** that the actual data has been passed in the
** next two arguments.
**
** depvar dependent variable.
**
** If dataset contains the name of a data set, this is
** interpreted as:
**
** string, name of dependent variable
** or
** scalar, index of dependent variable. If scalar 0,
** the last column of the data set will be used.
**
** If dataset is a null string or 0, this is
** interpreted as:
**
** Nx1 vector, the dependent variable
**
** indvars independent variables.
**
** If dataset contains the name of a data set, this is
** interpreted as:
**
** Kx1 character vector, names of independent
** variables
** or
** Kx1 numeric vector, indices of independent
** variables
**
** These can be any size subset of the variables in
** the data set, and can be in any order. If a
** scalar 0 is passed, all columns of the data set
** will be used except for the one used for the
** dependent variable.
**
** If dataset is a null string or 0, this is
** interpreted as:
**
** NxK matrix, the independent variables
**
** Defaults are provided for the following global input
** variables so they can be ignored unless you need control
** over the other options provided by this procedure.
**
** __altnam global vector, default 0.
**
** This can be a (K+2)x1 or (K+1)x1 character
** vector of alternate variable names for the
** output. If __con is 1, this must be (K+2)x1.
** The dependent variable is the last element.
** This has an effect only if the data are passed
** in as matrices.
**
** __con global scalar, default 1.
**
** 1 a constant term will be added, D = K+1.
**
** 0 no constant term will be added, D = K.
**
** A constant term is always used in
** constructing the moment matrix m.
**
** __miss global scalar, default 0.
**
** 0 there are no missing values (fastest).
**
** 1 listwise deletion, drop any cases in
** which missings occur.
**
** 2 pairwise deletion, this is equivalent to
** setting missings to 0 when calculating m.
** The number of cases computed is equal to
** the total number of cases in the data set.
**
** __row global scalar, the number of rows to read per
** iteration of the read loop. Default 0.
**
** If 0, the number of rows will be calculated
** internally. If you get an "Insufficient memory"
** error while executing OLS you can supply a value
** for __row that works on your system.
**
** The answers may vary slightly due to rounding
** error differences when a different number of
** rows is read per iteration. You can use __row
** to control this if you want to get exactly the
** same rounding effects between several runs.
**
** __output global scalar, default 1.
**
** 1 print the statistics.
**
** 0 do not print statistics.
**
** _olsres global scalar, default 0.
**
** 1 compute residuals (resid) and
** Durbin-Watson statistic (dwstat).
**
** 0 resid = 0, dwstat = 0.
**
** Output: vnam (K+2)x1 or (K+1)x1 character vector, the variable
** names used in the regression. If a constant term is
** used this vector will be (K+2)x1, and the first
** name will be "CONSTANT". The last name will be the
** name of the dependent variable.
**
** m MxM matrix, where M = K+2, the moment matrix
** constructed by calculating x'x where x is a matrix
** containing all useable observations and having columns
** in the order:
**
** constant ~ indvars ~ depvar
** ------------------------------------------------------
** (1.0) ~ (independent variables) ~ (dependent variable)
**
** A constant term is always used in computing m, even if
** __CON = 0.
**
** b Dx1 vector, the least squares estimates of parameters.
**
** Error handling is controlled by the low order bit
** of the trap flag.
**
** TRAP 0 terminate with error message
**
** TRAP 1 return scalar error code in b
**
** 30 system singular
** 31 system underdetermined
** 32 same number of columns as rows
** 33 too many missings
** 34 file not found
** 35 no variance in an independent variable
**
** The system can become underdetermined if you use listwise
** deletion and have missing values. In that case it is
** possible to skip so many cases that there are fewer
** useable rows than columns in the data set.
**
** stb Kx1 vector, the standardized coefficients.
**
** vc DxD matrix, the variance-covariance matrix of
** estimates.
**
** stderr Dx1 vector, the standard errors of the estimated
** parameters.
**
** sigma scalar, standard deviation of residual.
**
** cx (K+1)x(K+1) matrix, correlation matrix of variables
** in the order:
**
** independent variables ~ dependent variable
**
** rsq scalar, R square, coefficient of determination.
**
** resid residuals, resid = y - x * b
**
** If _olsres = 1, the residuals will be computed.
**
** If the data is taken from a data set, a new data
** set will be created for the residuals, using the
** name in the global string variable _olsrnam. The
** residuals will be saved in this data set as an
** Nx1 column. The resid return value will be a
** string containing the name of the new data set
** containing the residuals.
**
** If the data is passed in as a matrix, the
** resid return value will be the Nx1 vector of
** residuals.
**
** dwstat scalar, Durbin-Watson statistic.
**
** Remarks: No output file is modified, opened, or closed by this
** procedure. If you want output to be placed in a file
** you need to open an output file before calling ols.
** If a column of constant value has been included among
** the independent variables, this variable will be
** deleted.
**
** Examples: y = { 2,
** 3,
** 1,
** 7,
** 5 };
**
* * x = { 1 3 2,
** 2 3 1,
** 7 1 7,
** 5 3 1,
** 3 5 5 };
**
** output file = ols.out reset;
** call ols(0,y,x);
** output off;
**
** In this example, the output from ols was put into a
** file called ols.out as well as being printed on the
** screen. This example will to compute a least squares
** regression of y on x. The returned values were
** discarded by using a call statement.
**
** data = "olsdat";
** depvar = { score };
** indvars = { region,age,marstat };
** _olsres = 1;
** output file = lpt1 on;
** { nam,m,b,stb,vc,std,sig,cx,rsq,resid,dbw } =
** ols(data,depvar,indvars);
** output off;
**
** In this example the data set, olsdat.dat was used
** to compute a regression. The dependent variable
** is "score". The independent variables are:
** "region", "age", and "marstat". The residuals
** and Durbin-Watson statistic will be computed.
** The output will be sent to the printer as well as
** the screen and the returned values are assigned
** to variables.
**
** Globals: __altnam, __output, __row, __miss __con, _olsres, _olsrnam,
** indices2(), maxvec(), indexcat(), dotfeq()
**
** See Also: olsqr
*/
#ifDLLCALL
external proc indices2;
#else
external proc indices2,indexcat;
#endif
proc (11) = ols(dataset,depvar,indvars);
local const,idat,fin,tobs,depindx,indindx,nvar,nr,k,dta,y0,mn,nc,cy,
i,constflg,constvlu,vardx,std,vnames,cxx,cxxi,cxy,cyy,df,
sse,nobs,mobs,be,b,vc,stderr,t,rsq,rbsq,fstat,pvf,pvt,ms,
omat,mask,fmt,dwstat,mss,prcn,nvar1,cvec,old,u2,m,cor,constant,
stdb,fout,u,str,tv,oldtrp,u0,cov,stdest,dd,cnstname;
clear constflg,mobs,constant;
const = __con;
constvlu = 1;
mss = { . };
fin = -1;
fout = -1;
dataset = "" $+ dataset;
if dataset $== "";
if rows(indvars) /= rows(depvar);
errorlog "ERROR: Matrices X and Y have different numbers of rows";
end;
endif;
dta = indvars~depvar;
idat = 1;
tobs = rows(dta);
nobs = tobs;
nvar1 = cols(dta);
nvar = nvar1-1;
vardx = seqa(1,1,nvar1);
indvars = 0$+"X"$+ftocv(seqa(1,1,nvar),__vpad*(floor(log(nvar))+1),0);
depvar = 0$+"Y";
else;
idat = 0;
/* open file using name in variable DATASET */
open fin = ^dataset;
if fin == -1;
goto errout(34);
endif;
tobs = rowsf(fin);
{ depvar,depindx,indvars,indindx } = indices2(dataset,depvar,indvars);
vardx = indindx|depindx;
nobs = tobs;
nvar = rows(indindx);
nvar1 = nvar+1;
/* Computation of max number of rows to read at one time */
if __row;
nr = __row;
else;
k = colsf(fin);
nr = floor(minc(coreleft/(k*8*3.5)|maxvec/(k+1)));
endif;
endif;
if __miss == 2;
old = ndpcntrl(0,0);
call ndpcntrl(1,1);
clear mn,nc,m,i,nobs;
constflg = ones(1,nvar1);
do until i == tobs;
i = i+1;
if idat;
y0 = dta[i,.];
else;
y0 = readr(fin,1);
y0 = y0[vardx];
endif;
cy = (y0 .> 0 .or y0 .< 1);
ndpclex;
y0 = missrv(y0,0);
m = m+y0'*y0;
mn = mn + y0'*cy;
nc = nc+cy'*cy;
nobs = nobs+(cy /= 0);
endo;
call ndpcntrl(old,0xffff);
if nc == 0;
goto errout(31);
endif;
mobs = tobs-nobs;
mn = mn./nc;
m = m./nc;
elseif idat;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?