📄 randomforest.cpp
字号:
#include "randomForest.h"
#include <stdlib.h>
#include <stdio.h>
#define AS_ZERO 0.0001
#ifdef _WIN32
using namespace std;
#endif
#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif
randomForest::randomForest(int r, int c)
{
rows = r;
cols = c;
alloc();
// initialize all elements to zero
toZero();
}
// copy constructor //
randomForest::randomForest(const randomForest& m)
{
rows = m.rows;
cols = m.cols;
alloc();
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
element[i][j] = m.element[i][j];
}
// destructor //
randomForest::~randomForest(void)
{
release();
}
// allocate memory on heap //
void randomForest::alloc(void)
{
element = new T* [rows];
for (int i = 0; i < rows; i++)
element[i] = new T [cols];
}
// free heap memory //
void randomForest::release(void)
{
for (int i = 0; i < rows; i++)
{
delete [] element[i];
element[i]=0;
}
delete [] element;
element=0;
}
void randomForest::toZero(void)
{
for (int i = 0; i < rows; i++)
for (int j = 0; j < cols; j++)
element[i][j] = 0;
}
float randomForest::zermr(float rx[], int *m1, int *m2)
{
/* System generated locals */
int rx_dim, rx_offset;
/* Local variables */
static int i, j;
/* Parameter adjustments */
rx_dim = *m1;
rx_offset = rx_dim + 1;
rx -= rx_offset;
/* Function Body */
for (i = 0; i <= *m1+1; ++i)
{
for (j = 0; j <= *m2+1; ++j)
{
rx[i + j * rx_dim] = (float)0.;
}
if ((i + j * rx_dim) > *m2)
{
rx[i + j * rx_dim] = (float)0.;
}
}
if ((i + j * rx_dim) >= *m1)
{
rx[i + j * rx_dim] = (float)0.;
}
return (0);
} /* end zermr */
int randomForest::zerm(int mx[], int *m1, int *m2)
{
/* System generated locals */
int mx_dim1, mx_offset;
/* Local variables */
static int i, j;
/* Parameter adjustments */
mx_dim1 = *m1;
mx_offset = mx_dim1 + 1;
mx -= mx_offset;
/* Function Body */
for (i = 1; i <= *m1; ++i) {
for (j = 1; j <= *m2; ++j) {
mx[i + j * mx_dim1] = 0;
}
}
return(0);
} /* end zerm */
int randomForest::zerv(int ix[], int *m1)
{
/* Local variables */
static int n;
/* Parameter adjustments */
--ix;
/* Function Body */
for (n = 1; n <= *m1; ++n)
{
ix[n] = 0;
}
return(0);
} /* end zerv */
int randomForest::zerv_1(int ix[], int *m1)
{
/* Local variables */
static int n;
/* Parameter adjustments */
--ix;
/* Function Body */
for (n = 1; n <= *m1; ++n)
{
ix[n] = 1;
}
return(0);
} /* end zerv */
float randomForest::zervr(float rx[], int *m1)
{
/* Local variables */
static int n;
/* Parameter adjustments */
--rx;
/* Function Body */
for (n = 1; n <= *m1; ++n) {
rx[n] = (float)0.;
}
return(0);
} /* end zervr */
double randomForest::zermd(double dx[],int *m1,int *m2)
{
/* System generated locals */
int dx_dim1, dx_offset;
/* Local variables */
static int i, j;
/* Parameter adjustments */
dx_dim1 = *m1;
dx_offset = dx_dim1 + 1;
dx -= dx_offset;
/* Function Body */
for (i = 1; i <= *m1; ++i) {
for (j = 1; j <= *m2; ++j) {
dx[i + j * dx_dim1] = 0.;
}
}
return(0);
} /* end zermd */
int randomForest::Knearestneighborfix(float x[],int *ncase,int *mdim,int *nsample,int *cat,
float *code)
{
/* System generated locals */
int x_dim1, x_offset;
/* Local variables */
static int j, k, m, n;
static int nt;
int NearestNeighborUp = 0;
int NearestNeighborDown = 0;
int NearestNeighbor = 0;
int NearestNeighborAll = 0;
int nt_up = 0;
int nt_down = 0;
/* Parameter adjustments No.1 */
x_dim1 = *mdim;
x_offset = x_dim1 + 1;
x -= x_offset;
for (m = 1; m <= *mdim; ++m)
{
if (cat[m] == 1)
{
nt = 0;
for (n = 1; n <= *nsample; ++n)
{
/* Parameter adjustments No.2 */
NearestNeighborUp =0; NearestNeighborDown = 0; NearestNeighbor = 0; NearestNeighborAll = 0;
nt = 0; nt_up = 0; nt_down = 0;
if (x[m + n * x_dim1] == *code)
{
for (j=n+1; j <= n+25; ++j)
{
while ((x[m + j * x_dim1] != *code) && (j < *nsample) && (j > 0) && (nt_down < 25))
{
NearestNeighborDown += x[m + j * x_dim1];
++nt_down;
++j;
}
}
for (k=n-1; k >= n-25; --k)
{
while ((x[m + k * x_dim1] != *code) && (k < *nsample) && (k > 0) && (nt_up < 25))
{
NearestNeighborUp += x[m + k * x_dim1];
++nt_up;
--k;
}
}
NearestNeighborAll = NearestNeighborDown + NearestNeighborUp;
nt = nt_up + nt_down;
NearestNeighbor = NearestNeighborAll / nt;
x[m + n * x_dim1] = NearestNeighbor;
}
}
}
}
return(0);
} /* end Knearestneighborfix */
int randomForest::Ignorefix(float x[],float v[],int *ncase,int *mdim,int *nsample,int *cat,
float *code, int *ncolumn)
{
/* System generated locals */
int x_dim1, x_offset;
/* Local variables */
static int j, k, m, n;
static int nt;
/* Parameter adjustments No.1 */
--v;
x_dim1 = *mdim;
x_offset = x_dim1 + 1;
x -= x_offset;
for (m = 1; m <= *mdim; ++m)
{
if (cat[m] == 1)
{
nt = 0;
for (n = 1; n <= *nsample; ++n)
{
if (x[m + n * x_dim1] != *code)
{
++nt;
v[nt] = x[m + n * x_dim1];
}
}
for (k = 1; k < nt; ++k)
{
x[m + k * x_dim1] = v[k];
}
ncolumn[m]=nt;
}
}
return(0);
} /* end Ignorefix */
int randomForest::roughfix(float x[],float v[],int *ncase,int *mdim,int *nsample,float xts[],
int *ntest,int *cat,float *code,int *nrcat,int *maxcat,float *fill)
{
/* System generated locals */
int x_dim1, x_offset, xts_dim1, xts_offset;
int i_nint();
randomForest *zer_array = new randomForest(1,1);
/* Local variables */
static int lcat;
static float rmed;
static int jmax, nmax;
extern int zerv();
static int j, m, n;
extern int quicksort();
static int nt;
/* Parameter adjustments */
--fill;
--cat;
--ncase;
--v;
x_dim1 = *mdim;
x_offset = x_dim1 + 1;
x -= x_offset;
xts_dim1 = *mdim;
xts_offset = xts_dim1 + 1;
xts -= xts_offset;
--nrcat;
/* Function Body */
for (m = 1; m <= *mdim; ++m) {
if (cat[m] == 1) {
nt = 0;
for (n = 1; n <= *nsample; ++n) {
if (x[m + n * x_dim1] != *code) {
++nt;
v[nt] = x[m + n * x_dim1];
}
}
zer_array->quicksort(&v[1], &ncase[1], maxcat, &nt, nsample);
if (nt > 0) {
rmed = v[nt / 2];
fill[m] = rmed;
} else {
rmed = (float)0.;
}
for (n = 1; n <= *nsample; ++n) {
if (x[m + n * x_dim1] == *code) {
x[m + n * x_dim1] = rmed;
}
}
if (*ntest > 1) {
for (n = 1; n <= *ntest; ++n) {
if (xts[m + n * xts_dim1] == *code) {
xts[m + n * xts_dim1] = rmed;
}
}
}
}
if (cat[m] > 1) {
lcat = cat[m];
zer_array->zerv(&nrcat[1], maxcat);
for (n = 1; n <= *nsample; ++n) {
if (x[m + n * x_dim1] != *code) {
//j = i_nint(&x[m + n * x_dim1]);
j = (x[m + n * x_dim1]);
++nrcat[j];
}
}
nmax = 0;
jmax = 1;
for (j = 1; j <= lcat; ++j) {
if (nrcat[j] > nmax) {
nmax = nrcat[j];
jmax = j;
}
}
fill[m] = (float) jmax;
for (n = 1; n <= *nsample; ++n) {
if (x[m + n * x_dim1] == *code) {
x[m + n * x_dim1] = (float) jmax;
}
}
if (*ntest > 1) {
for (n = 1; n <= *ntest; ++n) {
if (xts[m + n * xts_dim1] == *code) {
xts[m + n * xts_dim1] = fill[m];
}
}
}
}
}
delete zer_array;
return(0);
} /* end roughfix */
int randomForest::prep(int cl[],int *nsample,int *nclass,int *ipi,float pi[],float pid[],int nc[],float wtt[])
{
/* Local variables */
extern /* Subroutine */ int zerv();
static int j, n;
static float sp;
/* Parameter adjustments */
--wtt;
--cl;
--nc;
--pid;
--pi;
/* Function Body */
randomForest *zer_array = new randomForest(1,1);
zer_array->zerv(&nc[1], nclass);
/* The loop find how many time the class appear at the class column , e.g. for satimage.tra
class 1 appear 38 times att , class 2 appear 436 ... */
for (n = 1; n <= *nsample; ++n)
{
++nc[cl[n]];
}
/* This loop divid the class appearance (e.g. 38) to the number of rows (e.g. 2296)*/
if (*ipi == 0)
{
for (j = 1; j <= *nclass; ++j)
{
pi[j] = (float) nc[j] / *nsample;
}
}
sp = (float)0.;
/* This loop add (+=) all pi[j], "sp" should give 1.00 */
for (j = 1; j <= *nclass; ++j)
{
sp += pi[j];
}
/* Divid pi[j] to sp*/
for (j = 1; j <= *nclass; ++j)
{
pi[j] /= sp;
}
for (j = 1; j <= *nclass; ++j)
{
/* Check if we get right results, pid shuold get 1.00*/
if (nc[j] >= 1)
{
pid[j] = pi[j] * *nsample / nc[j];
}
else
{
pid[j] = (float)0.;
}
/* If there is class e.g. 1 wtt array will get 0.00 */
for (n = 1; n <= *nsample; ++n)
{
wtt[n] = pid[cl[n]];
}
}
delete zer_array;
return(0);
} /* end prep */
float randomForest::makea(float x_train[],int *mdim,int *nsample,int cat[],int isort[],float v[],
int a[],int b[],int *mred,int *ncolumn)
{
/* System generated locals */
int x_dim1, x_offset, a_dim1, a_offset, b_dim1, b_offset;
/* Local variables */
static int ncat, mvar, n, n1, n2;
int nsample_orig = *nsample;
/* Parameter adjustments */
--cat;
b_dim1 = *mdim;
b_offset = b_dim1 + 1;
b -= b_offset;
a_dim1 = *mdim;
a_offset = a_dim1 + 1;
a -= a_offset;
--v;
--isort;
x_dim1 = *mdim;
x_offset = x_dim1 + 1;
x_train -= x_offset;
static int m;
for (m = 1; m <= *mdim; ++m)
{
cat[m - 1] = 1;
}
/* Function Body */
for (mvar = 1; mvar <= *mred; ++mvar)
{
if (cat[mvar] == 1)
{
/* v[n] is get data of each column from data file e.g v[1]=92 isort[1]=1, v[2]=84 isort[2]=2 ...*/
if (ncolumn[mvar] > 0){
*nsample = ncolumn[mvar];
}
for (n = 1; n <= *nsample; ++n)
{
v[n] = x_train[mvar + n * x_dim1];
isort[n] = n;
}
/* this sorts the v(n) in ascending order. isort(n) is the case number */
/* of that v(n) nth from the lowest (assume the original case numbers */
/* are 1,2,...). */
const int ntest=1;
randomForest *zer_array = new randomForest(1,1);
zer_array->quicksort(&v[1], &isort[1], &ntest, nsample, nsample);
delete zer_array;
for (n = 1; n <= (*nsample - 1); ++n)
{
/* n1 & n2 is the number of x-values location before sort e.g at column 1 smallest number is 40
and he found at 1909 */
n1 = isort[n];
n2 = isort[n + 1];
/* determine array "a", a is the array that hold x-values location before sort
e.g a[37]=1909, a[73]=1576 ...a[n*36]=isort[n] */
a[mvar + n * a_dim1] = n1;
/* determine array "b", b is the array that sign 1 for for array size "mvar + n2 * b_dim1" */
if (n == 1)
{
b[mvar + n1 * b_dim1] = 1;
}
/* update array "b" */
if (v[n] < v[n + 1])
{
b[mvar + n2 * b_dim1] = b[mvar + n1 * b_dim1] + 1;
}
else
{
b[mvar + n2 * b_dim1] = b[mvar + n1 * b_dim1];
}
}
/* update array "a" */
a[mvar + *nsample * a_dim1] = isort[*nsample];
}
}
*nsample = nsample_orig;
for (n = 1; n <= *nsample; ++n)
{
isort[n] = n;
}
return(0);
} /* end makea */
int randomForest::quicksort(float v[],int iperm[],const int *ii,int *jj,int *kk)
{
static int i, j, k, l, m, t, ij, il[32], iu[32], tt;
static float vt, vtt;
/************************************************************************/
/* puts into iperm the permutation vector which sorts v into */
/* increasing order. only elementest from ii to jj are considered. */
/* array iu(k) and array il(k) permit sorting up to 2**(k+1)-1 elements */
/* this is a modification of acm algorithm #347 by r. c. singleton, */
/* which is a modified hoare quicksort. */
/************************************************************************/
/* Parameter adjustments */
--iperm;
--v;
/* Function Body */
m = 1;
i = *ii;
j = *jj;
L10:
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -