dstat.src
来自「没有说明」· SRC 代码 · 共 458 行 · 第 1/2 页
SRC
458 行
**
** valid Kx1 vector, the number of valid cases.
**
** mis Kx1 vector, the number of missing cases.
**
*/
proc (8) = _dstatd(dataset,vars);
local i,fin,vnames,indx,kk,k,nr,tobs,valid,var,mean,std,max,min,missing,
sum,sumsq,z,fac,old,vtype,vlbl;
old = ndpcntrl(0,0);
call ndpcntrl(1,1);
/* open file using name in variable DATASET */
dataset = "" $+ dataset;
open fin = ^dataset;
if fin == -1;
if not trapchk(1);
errorlog "Can't open file: " $+ dataset;
end;
else;
retp(0,0,0,0,0,0,0,error(99));
endif;
endif;
if iscplxf(fin);
errorlog "ERROR: Not implemented for complex data sets.";
end;
endif;
if (rows(__vtype) /= colsf(fin)) and (rows(__vtype) /= 1);
errorlog "Invalid number of rows in __VTYPE";
retp(-99,0,0,0,0,0,0,0);
endif;
/* process variables */
{ vnames,indx } = indices(dataset,vars);
{ vlbl,vtype } = nametype(getname(dataset),__vtype);
vtype = vtype[indx];
/* Computation of max number of rows to read at one time */
if __row;
nr = __row;
else;
k = colsf(fin);
if __miss == 2;
fac = 4;
else;
fac = 3;
endif;
nr = floor(minc(coreleft/(k*8*fac)|maxvec/(k+1)));
endif;
tobs = rowsf(fin);
kk = rows(indx);
k = colsf(fin);
min = reshape(__INFp,kk,1); /* positive infinity */
max = reshape(__INFn,kk,1); /* negative infinity */
clear sum,sumsq,valid;
do until eof(fin);
z = readr(fin,nr);
z = z[.,indx];
if __miss == 1; /* listwise deletion */
z = packr(z);
if scalmiss(z);
continue;
endif;
min = minc(z|min');
max = maxc(z|max');
valid = valid + rows(z);
ndpclex;
elseif __miss == 2; /* pairwise deletion */
missing = zeros(kk,1);
i = 1;
do until i > kk;
missing[i] = missing[i] + counts(z[.,i],error(0));
i = i+1;
endo;
min = minc(missrv(z,__INFp)|min');
max = maxc(missrv(z,__INFn)|max');
valid = valid + rows(z) - missing;
z = missrv(z,0);
ndpclex;
else;
min = minc(z|min');
max = maxc(z|max');
valid = valid + rows(z);
endif;
sum = sumc(z) + sum;
sumsq = sumc(z.*z) + sumsq;
endo;
fin = close(fin);
if valid $== 0;
if not trapchk(1);
errorlog "Too many missings - no data left after packing";
else;
call ndpcntrl(old,0xffff);
ndpclex;
retp(0,0,0,0,0,0,0,error(99));
endif;
endif;
mean = sum./valid;
var = (sumsq-valid.*(mean.*mean))./( valid + (valid .== 1) - 1 );
var = missex(var,valid.==1);
var = maxc(var'|zeros(1,rows(var)));
std = sqrt(var);
missing = tobs-valid;
mean = missex(mean,(.not vtype));
var = missex(var,(.not vtype));
var = missex(var,var .== 0);
std = missex(std,(.not vtype));
std = missex(std,std .== 0);
min = missex(min,(.not vtype));
max = missex(max,(.not vtype));
call ndpcntrl(old,0xffff);
ndpclex;
retp(vnames,mean,var,std,min,max,valid,missing);
clear vlbl; /* never executed */
endp;
/*
**> _dstatx
**
** Purpose: This is used if a matrix in memory is passed into dstat.
**
** Format: { vnames,mean,var,std,min,max,valid,missing } = _dstatx(x);
**
** Input: x NxK matrix, the matrix in memory.
**
** Output: vnam Kx1 character vector, the names of the variables
** used in the statistics.
**
** mean Kx1 vector, means.
**
** var Kx1 vector, variance.
**
** std Kx1 vector, standard deviation.
**
** min Kx1 vector, minima.
**
** max Kx1 vector, maxima.
**
** valid Kx1 vector, the number of valid cases.
**
** mis Kx1 vector, the number of missing cases.
**
*/
proc (8) = _dstatx(x);
local i,vnames,k,tobs,valid,var,mean,std,max,min,missing,sum,
sumsq,old,vtype;
tobs = rows(x);
k = cols(x);
if __altnam $/= 0;
if rows(__altnam) $/= k;
errorlog "Invalid number of rows in __ALTNAM";
ndpclex;
retp(-99,0,0,0,0,0,0,0);
else;
vnames = __altnam;
endif;
else;
vnames = 0 $+ "X" $+ ftocv(seqa(1,1,k),floor(log(k)),0);
endif;
if (rows(__vtype) /= k) and (rows(__vtype) /= 1);
errorlog "Invalid number of rows in __VTYPE";
retp(-99,0,0,0,0,0,0,0);
endif;
{ vnames,vtype } = nametype(vnames,__vtype);
old = ndpcntrl(0,0);
call ndpcntrl(1,1);
missing = zeros(k,1);
if __miss == 1; /* listwise deletion */
x = packr(x);
if scalmiss(x);
if not trapchk(1);
errorlog "Too many missings - no data left after packing";
else;
retp(0,0,0,0,0,error(99));
endif;
endif;
min = minc(x);
max = maxc(x);
valid = rows(x);
ndpclex;
elseif __miss == 2; /* pairwise deletion */
i = 1;
do until i > k;
missing[i] = missing[i] + counts(x[.,i],error(0));
i = i+1;
endo;
min = minc(missrv(x,__INFp));
max = maxc(missrv(x,__INFn));
valid = rows(x) - missing;
x = missrv(x,0);
ndpclex;
else;
min = minc(x);
max = maxc(x);
valid = rows(x);
endif;
sum = sumc(x);
sumsq = sumc(x.*x);
mean = sum./valid;
var = (sumsq-valid.*(mean.*mean))./( valid + (valid .== 1) - 1);
var = maxc(var'|zeros(1,rows(var)));
std = sqrt(var);
missing = tobs - valid;
mean = missex(mean,(.not vtype));
var = missex(var,valid.==1);
var = missex(var,(.not vtype));
var = missex(var,var .== 0);
std = missex(std,(.not vtype));
std = missex(std,std .== 0);
min = missex(min,(.not vtype));
max = missex(max,(.not vtype));
ndpclex;
call ndpcntrl(old,0xffff);
retp(vnames,mean,var,std,min,max,valid,missing);
endp;
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?