datatran.src
来自「没有说明」· SRC 代码 · 共 840 行 · 第 1/2 页
SRC
840 行
/*
** datatran.src
** (C) Copyright 1988-1998 by Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC. This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code. In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
** Format Line
** ===============================================================
** y = CODE(e,v); 31
** y = DELIF(x,e); 128
** y = DUMMYBR(x,v); 193
** y = DUMMYDN(x,v,p); 291
** y = DUMMY(x,v); 417
** y = MISSEX(x,e); 521
** y = RECODE(x,e,v); 582
** y = SELIF(x,e); 693
** y = SUBSTUTE(x,e,v); 757
*/
/*
**> code
**
** Purpose: Allows a new variable to be created (coded) with different
** values depending upon which one of a set of logical
** expressions is true.
**
** Format: y = code(e,v);
**
** Input: e NxK matrix of 1's and 0's. Each column of this matrix
** is created by a logical expression using "dot" conditional
** and boolean operators. Each of these expressions should
** return a column vector result. The columns are
** horizontally concatenated to produce e. If more than
** one of these vectors contains a 1 in any given row,
** the code function will terminate with an error message.
**
** v K+1x1 vector containing the values to be
** assigned to the new variable.
**
** Output: y Nx1 vector containing the new values.
**
** Remarks: If none of the K expressions is true, the new
** variable is assigned the default value, which is
** given by the last element of v.
**
** Example: let x1 = 0 /* column vector of original values */
** 5
** 10
** 15
** 20;
**
** let v = 1 /* column vector of new values */
** 2
** 3; /* the last element of v is the "default" */
**
** e1 = (0 .lt x1) .and (x1 .le 5); /* expression 1 */
** e2 = (5 .lt x1) .and (x1 .le 25); /* expression 2 */
**
** e = e1~e2; /* concatenate e1 & e2 to make a 1,0 mask with
** :: one less column than the number of new values
** :: in v.
** */
**
** y = code(e,v);
** ___________________________________________________________
**
** x1[5,1] = 0 /* column vector of original values */
** 5
** 10
** 15
** 20;
**
** v[3,1] = 1 2 3 (Note: v is a column vector)
**
** e[5,2] = 0 0 y[5,1] = 3
** 1 0 1
** 0 1 2
** 0 1 2
** 0 1 2
**
** For every row in e, if a 1 is in the first column,
** the first element of v is used. If a 1 is in the
** second column, the second element of v is used, and
** so on. If there are only zeros in the row, the last
** element of v is used. This is the default value.
**
** If there is more than one 1 in any row of e, the
** function will terminate with an error message.
**
** Globals: None
**
** See Also: recode, subsitut
*/
proc code(e,v);
local nv;
if sumc(e') >= 2;
errorlog "ERROR: E vector has too many ones for CODE";
end;
endif;
if ismiss(v);
nv = abs(maxc(v))*1.1+1;
v = missrv(v,nv);
retp(
miss(
e*trimr(v,0,1) + (.not (sumc(e')))*trimr(v,rows(v)-1,0),
nv
)
);
else;
retp( e*trimr(v,0,1) + (.not (sumc(e')))*trimr(v,rows(v)-1,0) );
endif;
endp;
/*
**> delif
**
** Purpose: Deletes rows from a matrix. The rows deleted are
** those for which there is a 1 in the corresponding
** row of e.
**
** Format: y = delif(x,e);
**
** Input: x NxK data matrix.
**
** e Nx1 logical vector (vector of 0's and 1's).
**
** Output: y MxK data matrix consisting of the rows of y for
** which there is a 0 in the corresponding row of
** e; y will be a scalar missing if no rows remain.
**
** Remarks: The input e will usually be generated by a logical
** expression. For instance: y = delif(x,x .> 100);
** will delete all rows of x that are greater than 100.
** The remaining rows of x will be assigned to y.
**
** Example: let x[3,3] = 0 10 20
** 30 40 50
** 60 70 80;
**
** /* logical vector */
** e = (x[.,1] .gt 0) .and (x[.,3] .lt 100);
** y = delif(x,e);
**
** Here is the resulting matrix y:
**
** 0 10 20
**
** All rows for which the elements in column 1 are
** greater than 0 and the elements in column 3 are less
** than 100 are deleted.
**
** Globals: None
**
** See Also: selif
*/
proc delif(x,e);
/* check for complex input */
if iscplx(x);
if hasimag(x);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
x = real(x);
endif;
endif;
if iscplx(e);
if hasimag(e);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
e = real(e);
endif;
endif;
retp( submat(x,submat(packr(seqa(1,1,rows(x))~miss(e,1)),0,1),0) );
endp;
/*
**> dummybr
**
** Purpose: Creates a set of dummy (0/1) variables by "breaking
** up" a variable into specified categories. The
** highest (right-most) category is bounded on the
** right.
**
** Format: y = dummybr(x,v);
**
** Input: x Nx1 vector of data that is to be "broken up"
** into dummy variables.
**
** v Kx1 vector specifying the k breakpoints (these
** must be in ascending order) that determine the
** k categories to be used in computing the dummy
** variables.
**
** Output: y NxK matrix containing the k dummy variables.
**
** Remarks: Missings are deleted before the dummy variables are
** created.
**
** All categories are open on the left (i.e., do not
** contain their left boundaries) and are closed on the
** right (i.e., do contain their right boundaries).
** Thus, k breakpoints are required to specify k dummy
** variables.
**
** The function dummy is similar to dummybr, but in
** that function the highest category is unbounded on
** the right.
**
** Example: let x = 0 2 4 6;
** v = 1|5|7;
** y = dummybr(x,v);
**
** The resulting matrix y looks like this:
**
** 1 0 0
** 0 1 0
** 0 1 0
** 0 0 1
**
** The vector v=1|5|7 will produce 3 dummies satisfying
** the following conditions:
**
** x .<= 1
** 1 .< x .and x .<= 5
** 5 .< x .and x .<= 7.
**
** Globals: None
**
** See Also: dummydn, dummy
*/
proc dummybr(x,v);
local n, k, d, i, m;
/* check for complex input */
if iscplx(x);
if hasimag(x);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
x = real(x);
endif;
endif;
if iscplx(v);
if hasimag(v);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
v = real(v);
endif;
endif;
x = packr(x); /* Get rid of missings. */
if scalmiss(x);
errorlog "ERROR: NO ROWS REMAIN AFTER DELETING ROWS WITH MISSINGS";
end;
endif;
n = rows(x);
m = cols(x);
if m > 1;
errorlog "ERROR: X cannot have more than 1 column.";
end;
endif;
k = rows(v);
d = zeros(n,k);
d[.,1] = (x .<= v[1,.]);
i = 2;
do until i > k;
d[.,i] = ((v[i-1,.] .< x) .and (x .<= v[i,.]));
i = i+1;
endo;
retp(d);
endp;
/*
**> dummydn
**
** Purpose: Creates a set of dummy (0/1) variables by "breaking
** up" a variable into specified categories. The
** highest (right-most) category is unbounded on the
** right, and a specified column of dummies is dropped.
**
** Format: y = dummydn(x,v,p);
**
** Input: x Nx1 vector of data to be "broken up" into dummy
** variables.
**
** v Kx1 vector specifying the k-1 breakpoints
** (these must be in ascending order) that
** determine the k categories to be used in
** computing the dummy variables.
**
** p positive integer in the range [1,k], specifying
** which column should be dropped in the matrix of
** dummy variables.
**
** Output: y NxK-1 matrix containing the k-1 dummy variables.
**
** Remarks: This is just like the function dummy, except that
** the pth column of the matrix of dummies is dropped.
** This ensures that the columns of the matrix of
** dummies do not sum to 1, and so these variables will
** not be collinear with a vector of ones.
**
** Missings are deleted before the dummy variables are
** created.
**
** All categories are open on the left (i.e., do not
** contain their left boundaries) and all but the
** highest are closed on the right (i.e., do contain
** their right boundaries). The highest (right-most)
** category is unbounded on the right. Thus, only k-1
** breakpoints are required to specify k dummy
** variables.
**
** Example: let x = 0 2 4 6;
** v = 1|5|7; p = 2;
** y = dummydn(x,v,p);
**
** The resulting matrix y looks like this:
**
** 1 0 0
** 0 0 0
** 0 0 0
** 0 1 0
**
** The vector v=1|5|7 will produce 4 dummies satisfying
** the following conditions:
**
** x .<= 1
** 1 .< x .and x .<= 5
** 5 .< x .and x .<= 7
** 7 .< x.
**
** Since p equals 2, the second dummy is dropped.
**
** Globals: None
**
** See Also: dummy, dummybr
*/
proc dummydn(x,v,p);
local n, k, d, i, m, indx, e;
/* check for complex input */
if iscplx(x);
if hasimag(x);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
x = real(x);
endif;
endif;
if iscplx(v);
if hasimag(v);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
v = real(v);
endif;
endif;
if iscplx(p);
if hasimag(p);
errorlog "ERROR: Not implemented for complex matrices.";
end;
else;
p = real(p);
endif;
endif;
x = packr(x); /* Get rid of missings. */
if scalmiss(x);
errorlog "ERROR: NO ROWS REMAIN AFTER DELETING ROWS WITH MISSINGS";
end;
endif;
n = rows(x);
m = cols(x);
if m > 1;
errorlog "ERROR: X cannot have more than 1 column.";
end;
endif;
k = rows(v);
p = round(p); /* make sure p is integer */
if p < 1 or p > k+1;
errorlog "ERROR: The third argument (P) of DUMMYDN is out of range.";
end;
endif;
d = zeros(n,k+1);
d[.,1] = (x .<= v[1,.]);
d[.,k+1] = (v[k,.] .< x);
i = 2;
do until i > k;
d[.,i] = ((v[i-1,.] .< x) .and (x .<= v[i,.]));
i = i+1;
endo;
indx = seqa(1,1,k+1);
e = indx .== p;
indx = submat(indx,submat(packr(seqa(1,1,rows(indx))~miss(e,1)),0,1),0);
d = submat(d,0,indx);
retp(d);
endp;
/*
**> dummy
**
** Purpose: Creates a set of dummy (0/1) variables by "breaking
** up" a variable into specified categories. The
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?