datatran.src

来自「没有说明」· SRC 代码 · 共 840 行 · 第 1/2 页

SRC
840
字号
/*
** datatran.src
** (C) Copyright 1988-1998 by Aptech Systems, Inc.
** All Rights Reserved.
**
** This Software Product is PROPRIETARY SOURCE CODE OF APTECH
** SYSTEMS, INC.    This File Header must accompany all files using
** any portion, in whole or in part, of this Source Code.   In
** addition, the right to create such files is strictly limited by
** Section 2.A. of the GAUSS Applications License Agreement
** accompanying this Software Product.
**
** If you wish to distribute any portion of the proprietary Source
** Code, in whole or in part, you must first obtain written
** permission from Aptech Systems.
**
**  Format                                                   Line
** ===============================================================
** y = CODE(e,v);                                             31
** y = DELIF(x,e);                                           128
** y = DUMMYBR(x,v);                                         193
** y = DUMMYDN(x,v,p);                                       291
** y = DUMMY(x,v);                                           417
** y = MISSEX(x,e);                                          521
** y = RECODE(x,e,v);                                        582
** y = SELIF(x,e);                                           693
** y = SUBSTUTE(x,e,v);                                      757
*/

/*
**> code
**
**  Purpose:    Allows a new variable to be created (coded) with different
**              values depending upon which one of a set of logical
**              expressions is true.
**
**  Format:     y = code(e,v);
**
**  Input:      e    NxK matrix of 1's and 0's.  Each column of this matrix
**                   is created by a logical expression using "dot" conditional
**                   and boolean operators.  Each of these expressions should
**                   return a column vector result.  The columns are
**                   horizontally concatenated to produce e.  If more than
**                   one of these vectors contains a 1 in any given row,
**                   the code function will terminate with an error message.
**
**              v    K+1x1 vector containing the values to be
**                   assigned to the new variable.
**
**  Output:     y    Nx1 vector containing the new values.
**
**  Remarks:    If none of the K expressions is true, the new
**              variable is assigned the default value, which is
**              given by the last element of v.
**
**  Example:    let x1 = 0      /* column vector of original values */
**                       5
**                       10
**                       15
**                       20;
**
**              let v =  1      /* column vector of new values */
**                       2
**                       3;     /* the last element of v is the "default" */
**
**              e1 = (0 .lt x1) .and (x1 .le 5);        /* expression 1 */
**              e2 = (5 .lt x1) .and (x1 .le 25);       /* expression 2 */
**
**              e = e1~e2;   /* concatenate e1 & e2 to make a 1,0 mask with
**                           :: one less column than the number of new values
**                           :: in v.
**                           */
**
**              y = code(e,v);
**              ___________________________________________________________
**
**              x1[5,1] =   0      /* column vector of original values */
**                          5
**                          10
**                          15
**                          20;
**
**              v[3,1] =    1     2    3     (Note: v is a column vector)
**
**              e[5,2] =    0     0          y[5,1] =    3
**                          1     0                      1
**                          0     1                      2
**                          0     1                      2
**                          0     1                      2
**
**              For every row in e, if a 1 is in the first column,
**              the first element of v is used.  If a 1 is in the
**              second column, the second element of v is used, and
**              so on.  If there are only zeros in the row, the last
**              element of v is used.  This is the default value.
**
**              If there is more than one 1 in any row of e, the
**              function will terminate with an error message.
**
**  Globals:    None
**
**  See Also:   recode, subsitut
*/

proc code(e,v);
    local nv;

    if sumc(e') >= 2;
        errorlog "ERROR: E vector has too many ones for CODE";
        end;
    endif;

    if ismiss(v);
        nv = abs(maxc(v))*1.1+1;
        v = missrv(v,nv);
        retp(
            miss(
                    e*trimr(v,0,1) + (.not (sumc(e')))*trimr(v,rows(v)-1,0),
                    nv
                )
            );
    else;
        retp( e*trimr(v,0,1) + (.not (sumc(e')))*trimr(v,rows(v)-1,0) );
    endif;
endp;

/*
**> delif
**
**  Purpose:    Deletes rows from a matrix. The rows deleted are
**              those for which there is a 1 in the corresponding
**              row of e.
**
**  Format:     y = delif(x,e);
**
**  Input:      x    NxK data matrix.
**
**              e    Nx1 logical vector (vector of 0's and 1's).
**
**  Output:     y    MxK data matrix consisting of the rows of y for
**                   which there is a 0 in the corresponding row of
**                   e; y will be a scalar missing if no rows remain.
**
**  Remarks:    The input e will usually be generated by a logical
**              expression.  For instance: y = delif(x,x .> 100);
**              will delete all rows of x that are greater than 100.
**              The remaining rows of x will be assigned to y.
**
**  Example:    let x[3,3] = 0 10 20
**                          30 40 50
**                          60 70 80;
**
**              /* logical vector */
**              e = (x[.,1] .gt 0) .and (x[.,3] .lt 100);
**              y = delif(x,e);
**
**              Here is the resulting matrix y:
**
**                   0     10     20
**
**              All rows for which the elements in column 1 are
**              greater than 0 and the elements in column 3 are less
**              than 100 are deleted.
**
**  Globals:    None
**
**  See Also:   selif
*/

proc delif(x,e);
    /* check for complex input */
    if iscplx(x);
        if hasimag(x);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            x = real(x);
        endif;
    endif;
    if iscplx(e);
        if hasimag(e);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            e = real(e);
        endif;
    endif;

    retp( submat(x,submat(packr(seqa(1,1,rows(x))~miss(e,1)),0,1),0) );
endp;

/*
**> dummybr
**
**  Purpose:    Creates a set of dummy (0/1) variables by "breaking
**              up" a variable into specified categories. The
**              highest (right-most) category is bounded on the
**              right.
**
**  Format:     y = dummybr(x,v);
**
**  Input:      x    Nx1 vector of data that is to be "broken up"
**                   into dummy variables.
**
**              v    Kx1 vector specifying the k breakpoints (these
**                   must be in ascending order) that determine the
**                   k categories to be used in computing the dummy
**                   variables.
**
**  Output:     y    NxK matrix containing the k dummy variables.
**
**  Remarks:    Missings are deleted before the dummy variables are
**              created.
**
**              All categories are open on the left (i.e., do not
**              contain their left boundaries) and are closed on the
**              right (i.e., do contain their right boundaries).
**              Thus, k breakpoints are required to specify k dummy
**              variables.
**
**              The function dummy is similar to dummybr, but in
**              that function the highest category is unbounded on
**              the right.
**
**  Example:    let x = 0 2 4 6;
**              v = 1|5|7;
**              y = dummybr(x,v);
**
**              The resulting matrix y looks like this:
**
**                   1     0     0
**                   0     1     0
**                   0     1     0
**                   0     0     1
**
**              The vector v=1|5|7 will produce 3 dummies satisfying
**              the following conditions:
**
**                   x .<= 1
**                   1 .< x .and x .<= 5
**                   5 .< x .and x .<= 7.
**
**  Globals:    None
**
**  See Also:   dummydn, dummy
*/

proc dummybr(x,v);
    local n, k, d, i, m;
    /* check for complex input */
    if iscplx(x);
        if hasimag(x);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            x = real(x);
        endif;
    endif;
    if iscplx(v);
        if hasimag(v);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            v = real(v);
        endif;
    endif;

    x = packr(x);           /* Get rid of missings. */
    if scalmiss(x);
        errorlog "ERROR: NO ROWS REMAIN AFTER DELETING ROWS WITH MISSINGS";
        end;
    endif;
    n = rows(x);
    m = cols(x);
    if m > 1;
        errorlog "ERROR: X cannot have more than 1 column.";
        end;
    endif;
    k = rows(v);
    d = zeros(n,k);
    d[.,1] = (x .<= v[1,.]);
    i = 2;
    do until i > k;
        d[.,i] = ((v[i-1,.] .< x) .and (x .<= v[i,.]));
        i = i+1;
    endo;
    retp(d);
endp;

/*
**> dummydn
**
**  Purpose:    Creates a set of dummy (0/1) variables by "breaking
**              up" a variable into specified categories. The
**              highest (right-most) category is unbounded on the
**              right, and a specified column of dummies is dropped.
**
**  Format:     y = dummydn(x,v,p);
**
**  Input:      x    Nx1 vector of data to be "broken up" into dummy
**                   variables.
**
**              v    Kx1 vector specifying the k-1 breakpoints
**                   (these must be in ascending order) that
**                   determine the k categories to be used in
**                   computing the dummy variables.
**
**              p    positive integer in the range [1,k], specifying
**                   which column should be dropped in the matrix of
**                   dummy variables.
**
**  Output:     y    NxK-1 matrix containing the k-1 dummy variables.
**
**  Remarks:    This is just like the function dummy, except that
**              the pth column of the matrix of dummies is dropped.
**              This ensures that the columns of the matrix of
**              dummies do not sum to 1, and so these variables will
**              not be collinear with a vector of ones.
**
**              Missings are deleted before the dummy variables are
**              created.
**
**              All categories are open on the left (i.e., do not
**              contain their left boundaries) and all but the
**              highest are closed on the right (i.e., do contain
**              their right boundaries). The highest (right-most)
**              category is unbounded on the right. Thus, only k-1
**              breakpoints are required to specify k dummy
**              variables.
**
**  Example:    let x = 0 2 4 6;
**              v = 1|5|7;  p = 2;
**              y = dummydn(x,v,p);
**
**              The resulting matrix y looks like this:
**
**                   1     0     0
**                   0     0     0
**                   0     0     0
**                   0     1     0
**
**              The vector v=1|5|7 will produce 4 dummies satisfying
**              the following conditions:
**
**                   x .<= 1
**                   1 .< x .and x .<= 5
**                   5 .< x .and x .<= 7
**                   7 .< x.
**
**              Since p equals 2, the second dummy is dropped.
**
**  Globals:    None
**
**  See Also:   dummy, dummybr
*/

proc dummydn(x,v,p);
    local n, k, d, i, m, indx, e;
    /* check for complex input */
    if iscplx(x);
        if hasimag(x);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            x = real(x);
        endif;
    endif;
    if iscplx(v);
        if hasimag(v);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            v = real(v);
        endif;
    endif;
    if iscplx(p);
        if hasimag(p);
            errorlog "ERROR: Not implemented for complex matrices.";
            end;
        else;
            p = real(p);
        endif;
    endif;
    x = packr(x);           /* Get rid of missings. */
    if scalmiss(x);
        errorlog "ERROR: NO ROWS REMAIN AFTER DELETING ROWS WITH MISSINGS";
        end;
    endif;
    n = rows(x);
    m = cols(x);
    if m > 1;
        errorlog "ERROR: X cannot have more than 1 column.";
        end;
    endif;
    k = rows(v);
    p = round(p);           /* make sure p is integer */
    if p < 1 or p > k+1;
        errorlog "ERROR: The third argument (P) of DUMMYDN is out of range.";
        end;
    endif;
    d = zeros(n,k+1);
    d[.,1] = (x .<= v[1,.]);
    d[.,k+1] = (v[k,.] .< x);
    i = 2;
    do until i > k;
        d[.,i] = ((v[i-1,.] .< x) .and (x .<= v[i,.]));
        i = i+1;
    endo;
    indx = seqa(1,1,k+1);
    e = indx .== p;
    indx = submat(indx,submat(packr(seqa(1,1,rows(indx))~miss(e,1)),0,1),0);
    d = submat(d,0,indx);
    retp(d);
endp;

/*
**> dummy
**
**  Purpose:    Creates a set of dummy (0/1) variables by "breaking
**              up" a variable into specified categories. The

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?