dstat.src

来自「没有说明」· SRC 代码 · 共 458 行 · 第 1/2 页

SRC
458
字号
/*
** dstat.src - descriptive statistics
** (C) Copyright 1988-1997 by Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC.    This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code.   In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
**                  Format                                  Line
** ========================================================================
**
**      { vnames,mean,var,std,min,
**        max,valid,missing } = DSTAT(dataset,vars);         31
**      { vnam,mean,var,std,min,
**        max,valid,mis }     = _DSTATD(dataset,vars);      205
**      { vnam,mean,var,std,min,
**        max,valid,mis }     = _DSTATX(x);                 354
**/

#include gauss.ext

/*
**> dstat
**
**  Purpose:    Computes descriptive statistics.
**
**  Format:     { vnam,mean,var,std,min,max,valid,mis } = dstat(dataset,vars);
**
**  Input:      dataset    string, name of data set.
**
**                         If <dataset> is a null string or 0, <vars> will
**                         be assumed to be a matrix containing the data.
**
**              vars       Kx1 charater vector, names of the variables in
**                         <dataset> to be analyzed
**                                         OR
**                         Kx1 numeric vector, the indices of the variables
**                         in <dataset> to be analyzed.
**
**                         These can be any size subset of the variables in
**                         the data set, and can be in any order.  If a
**                         scalar 0 is passed, all columns of the data set
**                         will be used.
**
**                         If <dataset> is a null string, or 0, then <vars>
**                         will be interpreted as:
**
**                         NxM matrix, the data to be analyzed.
**
**  Globals:
**            __altnam      Mx1 matrix, character vector of alternate names
**                          to be used if a matrix in memory is being analyzed.
**                          (i.e. <dataset> is a null string, or 0).
**
**                          By default, the variables names will be X1,X2,..Xm
**
**            __miss        scalar.  Determines how missing values will be
**                          handled.
**
**                                 0   there are no missing values.
**
**                                 1   listwise deletion, drop an observation
**                                     if any missings occur in it.
**
**                                 2   pairwise deletion.
**
**                          Default = 0.
**
**            __row         scalar, the number of rows of <dataset> to be read
**                          per iteration of the read loop.
**
**                          By default the number of rows to be read will be
**                          calculated internally.
**
**           __output       scalar, controls output.
**
**                              1   print output table.
**
**                              0   do not print output.
**
**                          Default = 1.
**
**          __vtype         Mx1 vector,  or scalar, of 0's and 1's used to
**                          indicate whether the columns of <dataset> are
**                          character or numeric.  Set this global only if you
**                          are not following the upper/lower case convention.
**
**                          If you are not following this convention, and have
**                          mixed character-numeric data, then set __vtype as
**                          follows:  If __vtype is a vector, each element
**                          should be set to 0 is the corresponding column in
**                          <dataset> is character, and to 1 if the
**                          corresponding column in <dataset> is numeric.  If
**                          __vtype is a scalar, it is assumed that <dataset>
**                          is either all character (__vtype = 0) or all numeric
**                          (__vtype = 1).
**
**                          By default, __vtype = -1.  That is, if data to be
**                          analyzed is passed in a data set to dstat (<dataset>
**                          is the name of a data), then data type will
**                          be determined by looking at each variable name,
**                          following the upper/lower case convention.
**
**                          If a matrix in memory is passed to dstat, then
**                          either __vtype or __altnam (using the upper/lower
**                          case convention) will be used to determine the
**                          case. If __vtype = -1, and __altnam is not set,
**                          then it will be assumed that all data is numeric.
**
**  Output:     vnames      Kx1 character vector, the names of the variables
**                          used in the statistics.
**
**               mean       Kx1 vector, means.
**
**               var        Kx1 vector, variance.
**
**               std        Kx1 vector, standard deviation.
**
**               min        Kx1 vector, minima.
**
**               max        Kx1 vector, maxima.
**
**               valid      Kx1 vector, the number of valid cases.
**
**               missing    Kx1 vector, the number of missing cases.
**
**  Remarks:    If pairwise deletion is used, the minima and maxima will be
**              the true values for the valid data.  The means and standard
**              deviations will be computed using the correct number of
**              valid observations for each variable.
**
**
**  dstat
**
*/

external proc indices;

proc (8) = dstat(dataset,vars);
    local n,omat,vnames,mean,var,std,min,max,valid,missing,fmt,mask;
    dataset = "" $+ dataset;
    if type(vars) == 13;
         vars = stof(vars);
    endif;
    /* check for complex input */
    if iscplx(vars);
        if hasimag(vars);
            errorlog "ERROR: Matrix must be real.";
            end;
        else;
            vars = real(vars);
        endif;
    endif;
    if dataset $== "";
        { vnames, mean,var,std,min,max,valid,missing } = _dstatx(vars);
    else;
        { vnames,mean,var,std,min,max,valid,missing } = _dstatd(dataset,vars);
    endif;
    if vnames $== -99 or (scalerr(missing));
        retp(0,0,0,0,0,0,0,0);
    endif;
    msym "-----";
    n = rows(mean);
    if rows(missing) == 1;
        missing = missing*ones(n,1);
    endif;
    if rows(valid) == 1;
        valid = valid*ones(n,1);
    endif;
    if __output;
        print;
        print chrs(45*ones(79,1));
        print "Variable       Mean   Std Dev    Variance   Minimum   Maximum  "\
            "   Valid Missing" ;
        print chrs(45*ones(79,1));
        omat = vnames~mean~std~var~min~max~valid~missing;
        mask = 0~1~1~1~1~1~1~1;     /* mask to print first column as strings */
        let fmt[8,3]=
            "-*.*s "  8 8  /* format info for variable names column */
            "*.*lf" 10 4   /* format info for mean column           */
            "*.*lf" 10 4   /* format info for std deviation column  */
            "*.*lf" 12 4   /* format info for variance column       */
            "*.*lf" 10 4   /* format info for min column            */
            "*.*lf" 10 4   /* format info for max column            */
            "*.*lf" 10 0   /* format info for valid column          */
            "*.*lf" 5 0    /* format info for missing column        */;
        if not printfm(omat,mask,fmt);
            errorlog "ERROR: Can't write table";
            end;
        endif;
    endif;
    msym ".";
    retp(vnames,mean,var,std,min,max,valid,missing);
endp;

/*
**> _dstatd
**
**  Purpose:  This is used if the data is passed in a dataset.
**
**  Format:  { vnames,mean,var,std,min,max,valid,missing } =
**                                   _dstatd(dataset,vars);
**
**  Input:      dataset   string, the name of the dataset to be analyzed.
**
**              vars      Kx1 charater vector, names of the variables.
**                                       OR
**                        Kx1 numeric vector, indices of variables.
**
**  Output:     vnam      Kx1 character vector, the names of the variables
**                        used in the statistics.
**
**              mean      Kx1 vector, means.
**
**              var       Kx1 vector, variance.
**
**              std       Kx1 vector, standard deviation.
**
**              min       Kx1 vector, minima.
**
**              max       Kx1 vector, maxima.

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?