dstat.src
来自「没有说明」· SRC 代码 · 共 458 行 · 第 1/2 页
SRC
458 行
/*
** dstat.src - descriptive statistics
** (C) Copyright 1988-1997 by Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC. This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code. In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
** Format Line
** ========================================================================
**
** { vnames,mean,var,std,min,
** max,valid,missing } = DSTAT(dataset,vars); 31
** { vnam,mean,var,std,min,
** max,valid,mis } = _DSTATD(dataset,vars); 205
** { vnam,mean,var,std,min,
** max,valid,mis } = _DSTATX(x); 354
**/
#include gauss.ext
/*
**> dstat
**
** Purpose: Computes descriptive statistics.
**
** Format: { vnam,mean,var,std,min,max,valid,mis } = dstat(dataset,vars);
**
** Input: dataset string, name of data set.
**
** If <dataset> is a null string or 0, <vars> will
** be assumed to be a matrix containing the data.
**
** vars Kx1 charater vector, names of the variables in
** <dataset> to be analyzed
** OR
** Kx1 numeric vector, the indices of the variables
** in <dataset> to be analyzed.
**
** These can be any size subset of the variables in
** the data set, and can be in any order. If a
** scalar 0 is passed, all columns of the data set
** will be used.
**
** If <dataset> is a null string, or 0, then <vars>
** will be interpreted as:
**
** NxM matrix, the data to be analyzed.
**
** Globals:
** __altnam Mx1 matrix, character vector of alternate names
** to be used if a matrix in memory is being analyzed.
** (i.e. <dataset> is a null string, or 0).
**
** By default, the variables names will be X1,X2,..Xm
**
** __miss scalar. Determines how missing values will be
** handled.
**
** 0 there are no missing values.
**
** 1 listwise deletion, drop an observation
** if any missings occur in it.
**
** 2 pairwise deletion.
**
** Default = 0.
**
** __row scalar, the number of rows of <dataset> to be read
** per iteration of the read loop.
**
** By default the number of rows to be read will be
** calculated internally.
**
** __output scalar, controls output.
**
** 1 print output table.
**
** 0 do not print output.
**
** Default = 1.
**
** __vtype Mx1 vector, or scalar, of 0's and 1's used to
** indicate whether the columns of <dataset> are
** character or numeric. Set this global only if you
** are not following the upper/lower case convention.
**
** If you are not following this convention, and have
** mixed character-numeric data, then set __vtype as
** follows: If __vtype is a vector, each element
** should be set to 0 is the corresponding column in
** <dataset> is character, and to 1 if the
** corresponding column in <dataset> is numeric. If
** __vtype is a scalar, it is assumed that <dataset>
** is either all character (__vtype = 0) or all numeric
** (__vtype = 1).
**
** By default, __vtype = -1. That is, if data to be
** analyzed is passed in a data set to dstat (<dataset>
** is the name of a data), then data type will
** be determined by looking at each variable name,
** following the upper/lower case convention.
**
** If a matrix in memory is passed to dstat, then
** either __vtype or __altnam (using the upper/lower
** case convention) will be used to determine the
** case. If __vtype = -1, and __altnam is not set,
** then it will be assumed that all data is numeric.
**
** Output: vnames Kx1 character vector, the names of the variables
** used in the statistics.
**
** mean Kx1 vector, means.
**
** var Kx1 vector, variance.
**
** std Kx1 vector, standard deviation.
**
** min Kx1 vector, minima.
**
** max Kx1 vector, maxima.
**
** valid Kx1 vector, the number of valid cases.
**
** missing Kx1 vector, the number of missing cases.
**
** Remarks: If pairwise deletion is used, the minima and maxima will be
** the true values for the valid data. The means and standard
** deviations will be computed using the correct number of
** valid observations for each variable.
**
**
** dstat
**
*/
external proc indices;
proc (8) = dstat(dataset,vars);
local n,omat,vnames,mean,var,std,min,max,valid,missing,fmt,mask;
dataset = "" $+ dataset;
if type(vars) == 13;
vars = stof(vars);
endif;
/* check for complex input */
if iscplx(vars);
if hasimag(vars);
errorlog "ERROR: Matrix must be real.";
end;
else;
vars = real(vars);
endif;
endif;
if dataset $== "";
{ vnames, mean,var,std,min,max,valid,missing } = _dstatx(vars);
else;
{ vnames,mean,var,std,min,max,valid,missing } = _dstatd(dataset,vars);
endif;
if vnames $== -99 or (scalerr(missing));
retp(0,0,0,0,0,0,0,0);
endif;
msym "-----";
n = rows(mean);
if rows(missing) == 1;
missing = missing*ones(n,1);
endif;
if rows(valid) == 1;
valid = valid*ones(n,1);
endif;
if __output;
print;
print chrs(45*ones(79,1));
print "Variable Mean Std Dev Variance Minimum Maximum "\
" Valid Missing" ;
print chrs(45*ones(79,1));
omat = vnames~mean~std~var~min~max~valid~missing;
mask = 0~1~1~1~1~1~1~1; /* mask to print first column as strings */
let fmt[8,3]=
"-*.*s " 8 8 /* format info for variable names column */
"*.*lf" 10 4 /* format info for mean column */
"*.*lf" 10 4 /* format info for std deviation column */
"*.*lf" 12 4 /* format info for variance column */
"*.*lf" 10 4 /* format info for min column */
"*.*lf" 10 4 /* format info for max column */
"*.*lf" 10 0 /* format info for valid column */
"*.*lf" 5 0 /* format info for missing column */;
if not printfm(omat,mask,fmt);
errorlog "ERROR: Can't write table";
end;
endif;
endif;
msym ".";
retp(vnames,mean,var,std,min,max,valid,missing);
endp;
/*
**> _dstatd
**
** Purpose: This is used if the data is passed in a dataset.
**
** Format: { vnames,mean,var,std,min,max,valid,missing } =
** _dstatd(dataset,vars);
**
** Input: dataset string, the name of the dataset to be analyzed.
**
** vars Kx1 charater vector, names of the variables.
** OR
** Kx1 numeric vector, indices of variables.
**
** Output: vnam Kx1 character vector, the names of the variables
** used in the statistics.
**
** mean Kx1 vector, means.
**
** var Kx1 vector, variance.
**
** std Kx1 vector, standard deviation.
**
** min Kx1 vector, minima.
**
** max Kx1 vector, maxima.
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?