📄 classtree.c
字号:
leftNum += u * (2 * wl[k-1] + u); rightNum += u * (-2 * wr[k-1] + u); leftDen += u; rightDen -= u; wl[k-1] += u; wr[k-1] -= u; if (b[mvar, nc] < b[mvar, a[mvar, j]]) { if (fmin2(rightDen, leftDen) > 1.0e-5) { crit = (leftNum / leftDen) + (rightNum / rightDen); if (crit > critmax) { *bestSplit = j; critmax = crit; *splitVar = mvar; } /* Break ties at random: */ if (crit == critmax) { ntie++; if (unif_rand() > 1.0 / ntie) { *bestSplit = j; critmax = crit; *splitVar = mvar; } } } } } } else { /* Split on a categorical predictor. */ zeroDouble(classCatTable, nClass * 32); for (j = ndstart; j <= ndend; ++j) { nc = ncase[j-1]; l = a[mvar, ncase[j-1]]; classCatTable[class[nc-1], l-1] += weight[nc-1]; } nNotEmpty = 0; for (j = 0; j < lcat; ++j) { catSum = 0; for (k = 0; k < nClass; ++k) { catSum += classCatTable[k, j]; } catCount[j] = su; if (catSum > 0) nNotEmpty ++; } nhit = 0; if (nNotEmpty > 1) { F77_CALL(catmax)(parentden, classcatTable, classCount, &nclass, &lcat, bestSplit, &critmax, &nhit, &maxcat, &ncmax, &ncsplit); } if (nhit) *splitVar = mvar; } } if (critmax < -1.0e10 || msplit == 0) { *splitStatus = -1; } else { *decsplit = critmax - crit0; }}#endif /* C_CLASSTREE */void F77_NAME(catmax)(double *parentDen, double *tclasscat, double *tclasspop, int *nclass, int *lcat, int *ncatsp, double *critmax, int *nhit, int *maxcat, int *ncmax, int *ncsplit) {/* This finds the best split of a categorical variable with lcat categories and nclass classes, where tclasscat(j, k) is the number of cases in class j with category value k. The method uses an exhaustive search over all partitions of the category values if the number of categories is 10 or fewer. Otherwise ncsplit randomly selected splits are tested and best used. */ int j, k, n, icat[32], nsplit; double leftNum, leftDen, rightNum, decGini, *leftCatClassCount; leftCatClassCount = (double *) Calloc(*nclass, double); *nhit = 0; nsplit = *lcat > *ncmax ? *ncsplit : (int) pow(2.0, (double) *lcat - 1) - 1; for (n = 0; n < nsplit; ++n) { zeroInt(icat, 32); if (*lcat > *ncmax) { /* Generate random split. TODO: consider changing to generating random bits with more efficient algorithm */ for (j = 0; j < *lcat; ++j) icat[j] = unif_rand() > 0.5 ? 1 : 0; } else { unpack(n + 1, icat); } for (j = 0; j < *nclass; ++j) { leftCatClassCount[j] = 0; for (k = 0; k < *lcat; ++k) { if (icat[k]) { leftCatClassCount[j] += tclasscat[j + k * *nclass]; } } } leftNum = 0.0; leftDen = 0.0; for (j = 0; j < *nclass; ++j) { leftNum += leftCatClassCount[j] * leftCatClassCount[j]; leftDen += leftCatClassCount[j]; } /* If either node is empty, try another split. */ if (leftDen <= 1.0e-8 || *parentDen - leftDen <= 1.0e-5) continue; rightNum = 0.0; for (j = 0; j < *nclass; ++j) { leftCatClassCount[j] = tclasspop[j] - leftCatClassCount[j]; rightNum += leftCatClassCount[j] * leftCatClassCount[j]; } decGini = (leftNum / leftDen) + (rightNum / (*parentDen - leftDen)); if (decGini > *critmax) { *critmax = decGini; *nhit = 1; *ncatsp = *lcat > *ncmax ? pack(*lcat, icat) : n + 1; } } Free(leftCatClassCount);}/* Find best split of with categorical variable when there are two classes */void F77_NAME(catmaxb)(double *totalWt, double *tclasscat, double *classCount, int *nclass, int *nCat, int *nbest, double *critmax, int *nhit, double *catCount) { double catProportion[32], cp[32], cm[32]; int kcat[32]; int i, j; double bestsplit=0.0, rightDen, leftDen, leftNum, rightNum, crit; *nhit = 0; for (i = 0; i < *nCat; ++i) { catProportion[i] = catCount[i] ? tclasscat[i * *nclass] / catCount[i] : 0.0; kcat[i] = i + 1; } R_qsort_I(catProportion, kcat, 1, *nCat); for (i = 0; i < *nclass; ++i) { cp[i] = 0; cm[i] = classCount[i]; } rightDen = *totalWt; leftDen = 0.0; for (i = 0; i < *nCat - 1; ++i) { leftDen += catCount[kcat[i]-1]; rightDen -= catCount[kcat[i]-1]; leftNum = 0.0; rightNum = 0.0; for (j = 0; j < *nclass; ++j) { cp[j] += tclasscat[j + (kcat[i]-1) * *nclass]; cm[j] -= tclasscat[j + (kcat[i]-1) * *nclass]; leftNum += cp[j] * cp[j]; rightNum += cm[j] * cm[j]; } if (catProportion[i] < catProportion[i + 1]) { /* If neither node is empty, check the split. */ if (rightDen > 1.0e-5 && leftDen > 1.0e-5) { crit = (leftNum / leftDen) + (rightNum / rightDen); if (crit > *critmax) { *critmax = crit; bestsplit = .5 * (catProportion[i] + catProportion[i + 1]); *nhit = 1; } } } } if (*nhit == 1) { zeroInt(kcat, *nCat); for (i = 0; i < *nCat; ++i) { catProportion[i] = catCount[i] ? tclasscat[i * *nclass] / catCount[i] : 0.0; kcat[i] = catProportion[i] < bestsplit ? 1 : 0; } *nbest = pack(*nCat, kcat); }}void predictClassTree(double *x, int n, int mdim, int *treemap, int *nodestatus, double *xbestsplit, int *bestvar, int *nodeclass, int treeSize, int *cat, int nclass, int *jts, int *nodex, int maxcat) { int m, npack, i, j, k, *cbestsplit; /* decode the categorical splits */ if (maxcat > 1) { cbestsplit = (int *) Calloc(maxcat * treeSize, int); zeroInt(cbestsplit, maxcat * treeSize); for (i = 0; i < treeSize; ++i) { if (nodestatus[i] != NODE_TERMINAL) { if (cat[bestvar[i] - 1] > 1) { npack = (int) xbestsplit[i]; /* unpack `npack' into bits */ for (j = 0; npack; npack >>= 1, ++j) { cbestsplit[j + i*maxcat] = npack & 01; } } } } } for (i = 0; i < n; ++i) { k = 0; while (nodestatus[k] != NODE_TERMINAL) { m = bestvar[k] - 1; if (cat[m] == 1) { /* Split by a numerical predictor */ k = (x[m + i * mdim] <= xbestsplit[k]) ? treemap[k * 2] - 1 : treemap[1 + k * 2] - 1; } else { /* Split by a categorical predictor */ k = cbestsplit[(int) x[m + i * mdim] - 1 + k * maxcat] ? treemap[k * 2] - 1 : treemap[1 + k * 2] - 1; } } /* Terminal node: assign class label */ jts[i] = nodeclass[k]; nodex[i] = k + 1; } if (maxcat > 1) Free(cbestsplit);}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -