📄 randomforest.cpp
字号:
if (i >= j) {
goto L80;
}
L20:
k = i;
ij = (j + i) / 2;
t = iperm[ij];
vt = v[ij];
if (v[i] <= vt) {
goto L30;
}
iperm[ij] = iperm[i];
iperm[i] = t;
t = iperm[ij];
v[ij] = v[i];
v[i] = vt;
vt = v[ij];
L30:
l = j;
if (v[j] >= vt) {
goto L50;
}
iperm[ij] = iperm[j];
iperm[j] = t;
t = iperm[ij];
v[ij] = v[j];
v[j] = vt;
vt = v[ij];
if (v[i] <= vt) {
goto L50;
}
iperm[ij] = iperm[i];
iperm[i] = t;
t = iperm[ij];
v[ij] = v[i];
v[i] = vt;
vt = v[ij];
goto L50;
L40:
iperm[l] = iperm[k];
iperm[k] = tt;
v[l] = v[k];
v[k] = vtt;
L50:
--l;
if (v[l] > vt) {
goto L50;
}
tt = iperm[l];
vtt = v[l];
L60:
++k;
if (v[k] < vt) {
goto L60;
}
if (k <= l) {
goto L40;
}
if (l - i <= j - k) {
goto L70;
}
il[m - 1] = i;
iu[m - 1] = l;
i = k;
++m;
goto L90;
L70:
il[m - 1] = k;
iu[m - 1] = j;
j = l;
++m;
goto L90;
L80:
--m;
if (m == 0) {
return 0;
}
i = il[m - 1];
j = iu[m - 1];
L90:
if (j - i > 10) {
goto L20;
}
if (i == *ii) {
goto L10;
}
--i;
L100:
++i;
if (i == j) {
goto L80;
}
t = iperm[i + 1];
vt = v[i + 1];
if (v[i] <= vt) {
goto L100;
}
k = i;
L110:
iperm[k + 1] = iperm[k];
v[k + 1] = v[k];
--k;
if (vt < v[k]) {
goto L110;
}
iperm[k + 1] = t;
v[k + 1] = vt;
goto L100;
return(0);
} /* end quicksort */
double randomForest::rrand(int *j,float *ret_val)
{
/* Initialized data */
static int dseed = 17395.;
static double u;
randomForest *zer_array = new randomForest(1,1);
zer_array->lrnd(&dseed, &u);
*ret_val = (float) u;
delete zer_array;
return 0;
} /* end rrand */
int randomForest::lrnd(int *dseed, double *u)
{
/* Initialized data */
static int d31m1 = 2147483647.;
/* System generated locals */
int d__1;
d__1 = *dseed * 16087;
d__1 = abs(d__1);
*dseed = (d__1 % d31m1);
*u = (double)*dseed / (double)d31m1;
return 0;
} /* end lrnd_ */
int randomForest::eqm(int j[],int *k,int *m,int *n,int *ncolumn)
{
/* System generated locals */
int j_dim1, j_offset, k_dim1, k_offset;
int nsample_orig = *n;
/* Local variables */
static int m1, n1;
/* Parameter adjustments */
k_dim1 = *m;
k_offset = k_dim1 + 1;
k -= k_offset;
j_dim1 = *m;
j_offset = j_dim1 + 1;
j -= j_offset;
/* Function Body */
for (m1 = 1; m1 <= *m; ++m1) {
if (ncolumn[m1] > 0){
*n = ncolumn[m1];
}
for (n1 = 1; n1 <= *n; ++n1) {
j[m1 + n1 * j_dim1] = k[m1 + n1 * k_dim1];
}
}
*n = nsample_orig;
return 0;
} /* end eqm */
int randomForest::moda(int a[],int *nuse,int *nsample,int *mdim,int cat[],int *maxcat,int ncase[],int jin[],int ta[],int *ncolumn)
{
/* System generated locals */
int a_dim1, a_offset;
/* Local variables */
static int j, k, m, n, nt;
static int nsample_orig = *nsample;
/* Parameter adjustments */
--ta;
--jin;
--ncase;
--cat;
a_dim1 = *mdim;
a_offset = a_dim1 + 1;
a -= a_offset;
/* Function Body */
*nuse = 0;
/* accumulates the cases of random number "k" (jin[k - 1] = 1;)
e.g. for column 1 of data file n=2297 and *nuse=1471 */
for (n = 1; n <= *nsample; ++n)
{
if (jin[n] == 1)
{
++(*nuse);
}
}
for (m = 1; m <= *mdim; ++m)
{
k = 1;
nt = 1;
if (cat[m] == 1)
{
if (ncolumn[m] > 0){
*nsample = ncolumn[m];
}
for (n = 1; n <= *nsample; ++n)
{
if (jin[a[m + k * a_dim1]] == 1)
{
a[m + nt * a_dim1] = a[m + k * a_dim1];
++k;
}
else
{
for (j = 1; j <= (*nsample - k); ++j)
{
if (jin[a[m + (k + j) * a_dim1]] == 1)
{
a[m + nt * a_dim1] = a[m + (k + j) * a_dim1];
k = k + j + 1;
goto L28;
}
}
}
L28:
++nt;
if (nt > *nuse) {
goto L31;
}
}
L31:
;
}
}
if (*maxcat > 1)
{
k = 1;
nt = 1;
for (n = 1; n <= *nsample; ++n)
{
if (jin[k] == 1)
{
ncase[nt] = k;
++k;
}
else
{
for (j = 1; j <= (*nsample - k); ++j)
{
if (jin[k + j] == 1)
{
ncase[nt] = k + j;
k = k + j + 1;
goto L58;
}
}
}
L58:
++nt;
if (nt > *nuse)
{
goto L61;
}
}
L61:;
}
*nsample = nsample_orig;
return 0;
} /* moda */
int randomForest::buildtree(int a[],int b[],int cl[],int cat[],int *mdim,int *nsample,int *nclass,
int treemap[],int bestvar[],int bestsplit[],int bestsplitnext[],
float tgini[],int nodestatus[],int nodepop[],int nodestart[],
float classpop[],float tclasspop[],float tclasscat[],int ta[],
int *nrnodes,int idmove[],int *ndsize,int ncase[],int parent[],
int jin[],int *mtry,int iv[],int nodeclass[],int *ndbigtree,float win[],
float wr[],float wc[],float wl[],int *mred,int *nuse)
{
/* System generated locals */
int a_dim1, a_offset, b_dim1, b_offset, classpop_dim1, classpop_offset, tclasscat_dim1, tclasscat_offset;
/* Local variables */
static int ncur;
extern /* Subroutine */ int movedata(), zerv();
static float decsplit, popt1, popt2;
static int j, k, n, ndend, nbest, jstat;
extern /* Subroutine */ int zermr(), findbestsplit();
static int nc, kn;
static float pp;
static int ndendl, kbuild, msplit, ndstart;
/* Parameter adjustments */
--tgini;
--cat;
--win;
--jin;
--ncase;
--idmove;
--ta;
--cl;
b_dim1 = *mdim;
b_offset = b_dim1 + 1;
b -= b_offset;
a_dim1 = *mdim;
a_offset = a_dim1 + 1;
a -= a_offset;
--wl;
--wc;
--wr;
tclasscat_dim1 = *nclass;
tclasscat_offset = tclasscat_dim1 + 1;
tclasscat -= tclasscat_offset;
--tclasspop;
--nodeclass;
--parent;
classpop_dim1 = *nclass;
classpop_offset = classpop_dim1 + 1;
classpop -= classpop_offset;
--nodestart;
--nodepop;
--nodestatus;
--bestsplitnext;
--bestsplit;
--bestvar;
treemap -= 3;
--iv;
/* Function Body */
randomForest *zer_array = new randomForest(1,1);
zer_array->zerv(&nodestatus[1], nrnodes);
zer_array->zerv(&nodestart[1], nrnodes);
zer_array->zerv(&nodepop[1], nrnodes);
zer_array->zermr(&classpop[classpop_offset], nclass, nrnodes);
/* update classpop according to tclasspop which found before with "k"*/
for (j = 1; j <= *nclass; ++j)
{
classpop[j + classpop_dim1] = tclasspop[j];
}
ncur = 1;
nodestart[1] = 1;
/* Numbers of "k"-th */
nodepop[1] = *nuse;
nodestatus[1] = 2;
/* start main loop */
for (kbuild = 1; kbuild <= *nrnodes; ++kbuild)
{
if (kbuild > ncur)
{
goto L50;
}
if (nodestatus[kbuild] != 2)
{
goto L30;
}
/* initialize for next call to findbestsplit */
ndstart = nodestart[kbuild];
/* "ndend" get the numbers of "k"-th */
ndend = ndstart + nodepop[kbuild] - 1;
/* "tclasspop[j]" get the numbers of k-th according to class. e.g. class 1 ->34, class 2 -> 437, class 3 -> 700 */
for (j = 1; j <= *nclass; ++j)
{
tclasspop[j] = classpop[j + kbuild * classpop_dim1];
}
jstat = 0;
/* "Findbestsplit" does just that--it finds the best split of the current node.*/
zer_array->findbestsplit(&a[a_offset], &b[b_offset], &cl[1], mdim, nsample,
nclass, &cat[1], &ndstart, &ndend, &tclasspop[1], &tclasscat[
tclasscat_offset], &msplit, &decsplit, &nbest, &ncase[1], &
jstat, &jin[1], mtry, &iv[1], &win[1], &wr[1], &wc[1], &wl[1],
mred, &kbuild);
if (jstat == 1)
{
nodestatus[kbuild] = -1;
goto L30;
}
else
{
bestvar[kbuild] = msplit;
tgini[msplit] = decsplit + tgini[msplit];
if (cat[msplit] == 1)
{
bestsplit[kbuild] = a[msplit + nbest * a_dim1];
bestsplitnext[kbuild] = a[msplit + (nbest + 1) * a_dim1];
}
else
{
bestsplit[kbuild] = nbest;
bestsplitnext[kbuild] = 0;
}
}
zer_array->movedata(&a[a_offset], &ta[1], mdim, nsample, &ndstart, &ndend, &idmove[1], &ncase[1], &msplit, &cat[1],
&nbest, &ndendl);
/* leftnode no.= ncur+1, rightnode no. = ncur+2. */
nodepop[ncur + 1] = ndendl - ndstart + 1;
nodepop[ncur + 2] = ndend - ndendl;
nodestart[ncur + 1] = ndstart;
nodestart[ncur + 2] = ndendl + 1;
/* find class populations in both nodes */
for (n = ndstart; n <= ndendl; ++n)
{
if (cat[msplit] > 1)
{
nc = ncase[n];
}
else
{
nc = ncase[n];
}
j = cl[nc];
classpop[j + (ncur + 1) * classpop_dim1] += win[nc];
}
for (n = ndendl + 1; n <= ndend; ++n)
{
if (cat[msplit] > 1)
{
nc = ncase[n];
}
else
{
nc = ncase[n];
}
j = cl[nc];
classpop[j + (ncur + 2) * classpop_dim1] += win[nc];
}
/* check on nodestatus */
nodestatus[ncur + 1] = 2;
nodestatus[ncur + 2] = 2;
if (nodepop[ncur + 1] <= *ndsize)
{
nodestatus[ncur + 1] = -1;
}
if (nodepop[ncur + 2] <= *ndsize)
{
nodestatus[ncur + 2] = -1;
}
popt1 = (float)0.;
popt2 = (float)0.;
for (j = 1; j <= *nclass; ++j)
{
popt1 += classpop[j + (ncur + 1) * classpop_dim1];
popt2 += classpop[j + (ncur + 2) * classpop_dim1];
}
for (j = 1; j <= *nclass; ++j)
{
if (classpop[j + (ncur + 1) * classpop_dim1] == popt1)
{
nodestatus[ncur + 1] = -1;
}
if (classpop[j + (ncur + 2) * classpop_dim1] == popt2)
{
nodestatus[ncur + 2] = -1;
}
}
treemap[(kbuild << 1) + 1] = ncur + 1;
treemap[(kbuild << 1) + 2] = ncur + 2;
parent[ncur + 1] = kbuild;
parent[ncur + 2] = kbuild;
nodestatus[kbuild] = 1;
ncur += 2;
if (ncur >= *nrnodes)
{
goto L50;
}
L30:;
}
L50:
*ndbigtree = *nrnodes;
for (k = *nrnodes; k >= 1; --k)
{
if (nodestatus[k] == 0)
{
--(*ndbigtree);
}
if (nodestatus[k] == 2)
{
nodestatus[k] = -1;
}
}
for (kn = 1; kn <= *ndbigtree; ++kn)
{
if (nodestatus[kn] == -1)
{
pp = (float)0.;
for (j = 1; j <= *nclass; ++j)
{
if (classpop[j + kn * classpop_dim1] > pp)
{
nodeclass[kn] = j;
pp = classpop[j + kn * classpop_dim1];
}
}
}
}
delete zer_array;
return 0;
} /* end buildtree */
int randomForest::findbestsplit(int a[],int b[],int cl[],int *mdim,int *nsample,int *nclass,
int cat[],int *ndstart,int *ndend,float tclasspop[],float *tclasscat,
int *msplit, float *decsplit,int *nbest,int ncase[],int *jstat,
int jin[],int *mtry,int iv[],float win[],float wr[],float wc[],
float wl[],int *mred,int *kbuild)
{
/* System generated locals */
int a_dim1, a_offset, b_dim1, b_offset, tclasscat_dim1, tclasscat_offset;
/* Local variables */
static int lcat;
static float crit;
static int mvar, nbestvar;
static float crit0;
static int i, j, k, l;
static float u;
extern double rrand();
extern int zermr(), zervr();
static int nc, mt;
static float su;
extern int catmax();
static float zz, rld, pdo, rrd, rln, pno;
static int nsp;
static float rrn;
static int nnz;
static float critmax, critvar;
/* compute initial values of numerator and denominator of Gini */
/* Parameter adjustments */
--cat;
--win;
--jin;
--ncase;
--cl;
b_dim1 = *mdim;
b_offset = b_dim1 + 1;
b -= b_offset;
a_dim1 = *mdim;
a_offset = a_dim1 + 1;
a -= a_offset;
--wl;
--wc;
--wr;
tclasscat_dim1 = *nclass;
tclasscat_offset = tclasscat_dim1 + 1;
tclasscat -= tclasscat_offset;
--tclasspop;
--iv;
/* Function Body */
pno = (float)0.;
pdo = (float)0.;
for (j = 1; j <= *nclass; ++j)
{
pno += tclasspop[j] * tclasspop[j];
pdo += tclasspop[j];
}
crit0 = pno / pdo; /* crit0 = sum(tclasspop[j]^2) / sum(tclasspop[j]) e.g crit0 = 524.703 */
*jstat = 0;
/* zz is random number */
int ntest=1;
randomForest *zer_array = new randomForest(1,1);
zer_array->rrand(&ntest,&zz);
/* start main loop through variables to find best split */
critmax = (float)-1e20;
/* mtry is number of classes e.g. "7"*/
for (mt = 1; mt <= *mtry; ++mt)
{
float ret_val=0;
zer_array->rrand(&ntest,&ret_val);
mvar= (int)(*mred * ret_val) + 1;
//mvar = (int) (*mred * rrand(&ntest)) + 1; /* mvar is random class e.g. "3"*/
if (cat[mvar] == 1) {
rrn = pno;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -