📄 rfutils.c

📁 是基于linux系统的C++程序
💻 C
字号:
/*******************************************************************   Copyright (C) 2001-7 Leo Breiman, Adele Cutler and Merck & Co., Inc.     This program is free software; you can redistribute it and/or   modify it under the terms of the GNU General Public License   as published by the Free Software Foundation; either version 2   of the License, or (at your option) any later version.    This program is distributed in the hope that it will be useful,   but WITHOUT ANY WARRANTY; without even the implied warranty of   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the   GNU General Public License for more details.                            *******************************************************************/#include <R.h>#include "rf.h"void zeroInt(int *x, int length) {    memset(x, 0, length * sizeof(int)); }void zeroDouble(double *x, int length) {    memset(x, 0, length * sizeof(double)); }void createClass(double *x, int realN, int totalN, int mdim) {/* Create the second class by bootstrapping each variable independently. */    int i, j, k;    for (i = realN; i < totalN; ++i) {        for (j = 0; j < mdim; ++j) {            k = (int) unif_rand() * realN;            x[j + i * mdim] = x[j + k * mdim];        }    }}void normClassWt(int *cl, const int nsample, const int nclass,                  const int useWt, double *classwt, int *classFreq) {    int i;    double sumwt = 0.0;        if (useWt) {        /* Normalize user-supplied weights so they sum to one. */        for (i = 0; i < nclass; ++i) sumwt += classwt[i];        for (i = 0; i < nclass; ++i) classwt[i] /= sumwt;    } else {        for (i = 0; i < nclass; ++i) {            classwt[i] = ((double) classFreq[i]) / nsample;        }    }       for (i = 0; i < nclass; ++i) {        classwt[i] = classFreq[i] ? classwt[i] * nsample / classFreq[i] : 0.0;    }}void makeA(double *x, const int mdim, const int nsample, int *cat, int *a,            int *b) {    /* makeA() constructs the mdim by nsample integer array a.  For each        numerical variable with values x(m, n), n=1, ...,nsample, the x-values        are sorted from lowest to highest.  Denote these by xs(m, n).  Then        a(m,n) is the case number in which xs(m, n) occurs. The b matrix is        also contructed here.  If the mth variable is categorical, then        a(m, n) is the category of the nth case number. */    int i, j, n1, n2, *index;    double *v;    v     = (double *) Calloc(nsample, double);    index = (int *) Calloc(nsample, int);    for (i = 0; i < mdim; ++i) {        if (cat[i] == 1) { /* numerical predictor */            for (j = 0; j < nsample; ++j) {                v[j] = x[i + j * mdim];                index[j] = j + 1;            }            R_qsort_I(v, index, 1, nsample);            /*  this sorts the v(n) in ascending order. index(n) is the case                 number of that v(n) nth from the lowest (assume the original                 case numbers are 1,2,...).  */            for (j = 0; j < nsample-1; ++j) {                n1 = index[j];                n2 = index[j + 1];                a[i + j * mdim] = n1;                if (j == 0) b[i + (n1-1) * mdim] = 1;                b[i + (n2-1) * mdim] =  (v[j] < v[j + 1]) ?                    b[i + (n1-1) * mdim] + 1 : b[i + (n1-1) * mdim];            }            a[i + (nsample-1) * mdim] = index[nsample-1];        } else { /* categorical predictor */            for (j = 0; j < nsample; ++j)                 a[i + j*mdim] = (int) x[i + j * mdim];        }    }    Free(index);    Free(v);}void modA(int *a, int *nuse, const int nsample, const int mdim,	  int *cat, const int maxcat, int *ncase, int *jin) {    int i, j, k, m, nt;    *nuse = 0;    for (i = 0; i < nsample; ++i) if (jin[i]) (*nuse)++;        for (i = 0; i < mdim; ++i) {      k = 0;      nt = 0;      if (cat[i] == 1) {          for (j = 0; j < nsample; ++j) {              if (jin[a[i + k * mdim] - 1]) {                  a[i + nt * mdim] = a[i + k * mdim];                  k++;              } else {                  for (m = 0; m < nsample - k; ++m) {                      if (jin[a[i + (k + m) * mdim] - 1]) {                          a[i + nt * mdim] = a[i + (k + m) * mdim];                          k += m + 1;                          break;                      }                  }              }              nt++;              if (nt >= *nuse) break;          }      }    }    if (maxcat > 1) {        k = 0;        nt = 0;        for (i = 0; i < nsample; ++i) {            if (jin[k]) {                k++;                ncase[nt] = k;            } else {                for (j = 0; j < nsample - k; ++j) {                    if (jin[k + j]) {                        ncase[nt] = k + j + 1;                        k += j + 1;                        break;                    }                }            }            nt++;            if (nt >= *nuse) break;        }    }}void Xtranslate(double *x, int mdim, int nrnodes, int nsample, 		int *bestvar, int *bestsplit, int *bestsplitnext,		double *xbestsplit, int *nodestatus, int *cat, int treeSize) {/* this subroutine takes the splits on numerical variables and translates them back into x-values.  It also unpacks each categorical split into a  32-dimensional vector with components of zero or one--a one indicates that  the corresponding category goes left in the split.*/    int i, m;    for (i = 0; i < treeSize; ++i) {	if (nodestatus[i] == 1) {	    m = bestvar[i] - 1;	    if (cat[m] == 1) {		xbestsplit[i] = 0.5 * (x[m + (bestsplit[i] - 1) * mdim] +				       x[m + (bestsplitnext[i] - 1) * mdim]);	    } else {		xbestsplit[i] = (double) bestsplit[i];	    }	}    }}void permuteOOB(int m, double *x, int *in, int nsample, int mdim) {/* Permute the OOB part of a variable in x.  * Argument: *   m: the variable to be permuted *   x: the data matrix (variables in rows) *   in: vector indicating which case is OOB *   nsample: number of cases in the data *   mdim: number of variables in the data */    double *tp, tmp;    int i, last, k, nOOB = 0;        tp = (double *) Calloc(nsample, double);    for (i = 0; i < nsample; ++i) {	/* make a copy of the OOB part of the data into tp (for permuting) */	if (in[i] == 0) {            tp[nOOB] = x[m + i*mdim];            nOOB++;        }    }    /* Permute tp */    last = nOOB;    for (i = 0; i < nOOB; ++i) {	k = (int) last * unif_rand();	tmp = tp[last - 1];	tp[last - 1] = tp[k];	tp[k] = tmp;	last--;    }    /* Copy the permuted OOB data back into x. */    nOOB = 0;    for (i = 0; i < nsample; ++i) {	if (in[i] == 0) {            x[m + i*mdim] = tp[nOOB];            nOOB++;	}    }    Free(tp);}/* Compute proximity. */void computeProximity(double *prox, int oobprox, int *node, int *inbag,                       int *oobpair, int n) {/* Accumulate the number of times a pair of points fall in the same node.   prox:    n x n proximity matrix   oobprox: should the accumulation only count OOB cases? (0=no, 1=yes)   node:    vector of terminal node labels   inbag:   indicator of whether a case is in-bag   oobpair: matrix to accumulate the number of times a pair is OOB together   n:       total number of cases*/    int i, j;    for (i = 0; i < n; ++i) {        for (j = i+1; j < n; ++j) {            if (oobprox) {                /* if (jin[k] == 0 && jin[n] == 0) { */                if ((inbag[i] > 0) ^ (inbag[j] > 0)) {                    oobpair[j*n + i] ++;                    oobpair[i*n + j] ++;                    if (node[i] == node[j]) {                         prox[j*n + i] += 1.0;                        prox[i*n + j] += 1.0;                    }                }            } else {                if (node[i] == node[j]) {                    prox[j*n + i] += 1.0;                    prox[i*n + j] += 1.0;                }            }        }    }}int pack(int nBits, int *bits) {    int i = nBits, pack=0;    while (--i >= 0) pack += bits[i] << i;    return(pack);}void unpack(int pack, int *bits) {/* pack is a 4-byte integer.  The sub. returns icat, an integer array of    zeroes and ones corresponding to the coefficients in the binary expansion    of pack. */       int i;    for (i = 0; pack != 0; pack >>= 1, ++i) bits[i] = pack & 1;}#ifdef OLDdouble oldpack(int l, int *icat) {    /* icat is a binary integer with ones for categories going left      * and zeroes for those going right.  The sub returns npack- the integer */    int k;    double pack = 0.0;    for (k = 0; k < l; ++k) {	if (icat[k]) pack += R_pow_di(2.0, k);    }    return(pack);}void oldunpack(int l, int npack, int *icat) {/*       * npack is a long integer.  The sub. returns icat, an integer of zeroes and * ones corresponding to the coefficients in the binary expansion of npack. */       int i;    zeroInt(icat, 32);    icat[0] = npack % 2;     for (i = 1; i < l; ++i) {	npack = (npack - icat[i-1]) / 2;	icat[i] = npack % 2;    }}#endif /* OLD */
💿 文件大小 69 K
👤 上传用户 shicg666666
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #程序
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -