⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 classtree.c

📁 是基于linux系统的C++程序
💻 C
📖 第 1 页 / 共 2 页
字号:
                leftNum += u * (2 * wl[k-1] + u);                rightNum += u * (-2 * wr[k-1] + u);                leftDen += u;                rightDen -= u;                wl[k-1] += u;                wr[k-1] -= u;                if (b[mvar, nc] < b[mvar, a[mvar, j]]) {                    if (fmin2(rightDen, leftDen) > 1.0e-5) {                        crit = (leftNum / leftDen) + (rightNum / rightDen);                        if (crit > critmax) {                            *bestSplit = j;                            critmax = crit;                            *splitVar = mvar;                        }                        /* Break ties at random: */                        if (crit == critmax) {			    ntie++;			    if (unif_rand() > 1.0 / ntie) {				*bestSplit = j;				critmax = crit;				*splitVar = mvar;			    }                        }                    }                }            }        } else {            /* Split on a categorical predictor. */            zeroDouble(classCatTable, nClass * 32);            for (j = ndstart; j <= ndend; ++j) {                nc = ncase[j-1];                l = a[mvar, ncase[j-1]];                classCatTable[class[nc-1], l-1] += weight[nc-1];            }            nNotEmpty = 0;            for (j = 0; j < lcat; ++j) {                catSum = 0;                for (k = 0; k < nClass; ++k) {                    catSum += classCatTable[k, j];                }                catCount[j] = su;                if (catSum > 0) nNotEmpty ++;            }            nhit = 0;            if (nNotEmpty > 1) {                F77_CALL(catmax)(parentden, classcatTable, classCount,                                  &nclass, &lcat, bestSplit, &critmax, &nhit,                                  &maxcat, &ncmax, &ncsplit);            }            if (nhit) *splitVar = mvar;        }    }    if (critmax < -1.0e10 || msplit == 0) {        *splitStatus = -1;    } else {        *decsplit = critmax - crit0;    }}#endif /* C_CLASSTREE */void F77_NAME(catmax)(double *parentDen, double *tclasscat,                       double *tclasspop, int *nclass, int *lcat,                       int *ncatsp, double *critmax, int *nhit,                       int *maxcat, int *ncmax, int *ncsplit) {/* This finds the best split of a categorical variable with lcat    categories and nclass classes, where tclasscat(j, k) is the number    of cases in class j with category value k. The method uses an    exhaustive search over all partitions of the category values if the    number of categories is 10 or fewer.  Otherwise ncsplit randomly    selected splits are tested and best used. */    int j, k, n, icat[32], nsplit;    double leftNum, leftDen, rightNum, decGini, *leftCatClassCount;        leftCatClassCount = (double *) Calloc(*nclass, double);    *nhit = 0;    nsplit = *lcat > *ncmax ?         *ncsplit : (int) pow(2.0, (double) *lcat - 1) - 1;    for (n = 0; n < nsplit; ++n) {        zeroInt(icat, 32);        if (*lcat > *ncmax) {            /* Generate random split.               TODO: consider changing to generating random bits with more               efficient algorithm */            for (j = 0; j < *lcat; ++j) icat[j] = unif_rand() > 0.5 ? 1 : 0;        } else {            unpack(n + 1, icat);        }        for (j = 0; j < *nclass; ++j) {            leftCatClassCount[j] = 0;            for (k = 0; k < *lcat; ++k) {                if (icat[k]) {                    leftCatClassCount[j] += tclasscat[j + k * *nclass];                }            }        }        leftNum = 0.0;        leftDen = 0.0;        for (j = 0; j < *nclass; ++j) {            leftNum += leftCatClassCount[j] * leftCatClassCount[j];            leftDen += leftCatClassCount[j];        }        /* If either node is empty, try another split. */        if (leftDen <= 1.0e-8 || *parentDen - leftDen <= 1.0e-5) continue;        rightNum = 0.0;        for (j = 0; j < *nclass; ++j) {            leftCatClassCount[j] = tclasspop[j] - leftCatClassCount[j];            rightNum += leftCatClassCount[j] * leftCatClassCount[j];        }        decGini = (leftNum / leftDen) + (rightNum / (*parentDen - leftDen));        if (decGini > *critmax) {            *critmax = decGini;            *nhit = 1;            *ncatsp = *lcat > *ncmax ? pack(*lcat, icat) : n + 1;        }    }    Free(leftCatClassCount);}/* Find best split of with categorical variable when there are two classes */void F77_NAME(catmaxb)(double *totalWt, double *tclasscat, double *classCount,                       int *nclass, int *nCat, int *nbest, double *critmax,                        int *nhit, double *catCount) {    double catProportion[32], cp[32], cm[32];    int kcat[32];    int i, j;    double bestsplit=0.0, rightDen, leftDen, leftNum, rightNum, crit;    *nhit = 0;    for (i = 0; i < *nCat; ++i) {        catProportion[i] = catCount[i] ?             tclasscat[i * *nclass] / catCount[i] : 0.0;        kcat[i] = i + 1;    }    R_qsort_I(catProportion, kcat, 1, *nCat);    for (i = 0; i < *nclass; ++i) {        cp[i] = 0;        cm[i] = classCount[i];    }    rightDen = *totalWt;    leftDen = 0.0;    for (i = 0; i < *nCat - 1; ++i) {        leftDen += catCount[kcat[i]-1];        rightDen -= catCount[kcat[i]-1];        leftNum = 0.0;        rightNum = 0.0;        for (j = 0; j < *nclass; ++j) {            cp[j] += tclasscat[j + (kcat[i]-1) * *nclass];            cm[j] -= tclasscat[j + (kcat[i]-1) * *nclass];            leftNum += cp[j] * cp[j];            rightNum += cm[j] * cm[j];        }        if (catProportion[i] < catProportion[i + 1]) {            /* If neither node is empty, check the split. */            if (rightDen > 1.0e-5 && leftDen > 1.0e-5) {                crit = (leftNum / leftDen) + (rightNum / rightDen);                if (crit > *critmax) {                    *critmax = crit;                    bestsplit = .5 * (catProportion[i] + catProportion[i + 1]);                    *nhit = 1;                }            }        }    }    if (*nhit == 1) {        zeroInt(kcat, *nCat);        for (i = 0; i < *nCat; ++i) {            catProportion[i] = catCount[i] ?                 tclasscat[i * *nclass] / catCount[i] : 0.0;            kcat[i] = catProportion[i] < bestsplit ? 1 : 0;        }        *nbest = pack(*nCat, kcat);    }}void predictClassTree(double *x, int n, int mdim, int *treemap,		      int *nodestatus, double *xbestsplit,		      int *bestvar, int *nodeclass,		      int treeSize, int *cat, int nclass,		      int *jts, int *nodex, int maxcat) {    int m, npack, i, j, k, *cbestsplit;    /* decode the categorical splits */    if (maxcat > 1) {        cbestsplit = (int *) Calloc(maxcat * treeSize, int);        zeroInt(cbestsplit, maxcat * treeSize);        for (i = 0; i < treeSize; ++i) {            if (nodestatus[i] != NODE_TERMINAL) {                if (cat[bestvar[i] - 1] > 1) {                    npack = (int) xbestsplit[i];                    /* unpack `npack' into bits */                    for (j = 0; npack; npack >>= 1, ++j) {                        cbestsplit[j + i*maxcat] = npack & 01;                    }                }            }        }    }    for (i = 0; i < n; ++i) {	k = 0;	while (nodestatus[k] != NODE_TERMINAL) {            m = bestvar[k] - 1;            if (cat[m] == 1) {  	        /* Split by a numerical predictor */	        k = (x[m + i * mdim] <= xbestsplit[k]) ?		    treemap[k * 2] - 1 : treemap[1 + k * 2] - 1;	    } else {	        /* Split by a categorical predictor */	        k = cbestsplit[(int) x[m + i * mdim] - 1 + k * maxcat] ?		    treemap[k * 2] - 1 : treemap[1 + k * 2] - 1;	    }	}	/* Terminal node: assign class label */	jts[i] = nodeclass[k];	nodex[i] = k + 1;    }    if (maxcat > 1) Free(cbestsplit);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -