📄 regress.c
字号:
/*---------------------------------------------------------------------- File : regress.c Contents: multivariate polynomial regression management Author : Christian Borgelt History : 2001.10.13 file created 2001.10.15 first version completed 2001.10.19 bug in function reg_desc removed 2001.10.20 computation of the sum of squared errors added 2001.10.26 function reg_sse added, reg_calc modified 2007.04.04 extended regression functions added 2007.04.12 functions reg_parse and reg_parsex added 2007.05.30 linear regression treated separately in _prods----------------------------------------------------------------------*/#include <stdlib.h>#include <string.h>#include <limits.h>#include <math.h>#include <assert.h>#include "regress.h"/*---------------------------------------------------------------------- Preprocessor Definitions----------------------------------------------------------------------*/#define REG_MAX ((int)sqrt((double)(INT_MAX/4)/sizeof(double)))/*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/static int _count (REGRESS *reg){ /* --- compute number of coefficients */ int i, k; /* loop variables */ assert(reg && reg->tab); /* check the function arguments */ if ((reg->dim <= 2) /* if single variable regression */ || (reg->deg <= 2)) { /* or linear/quadratic regression */ if (reg->deg <= 1) return reg->dim; if (reg->dim <= 2) return reg->deg +1; return (reg->dim *(reg->dim +1)) >> 1; } /* compute the number directly */ for (k = reg->dim; --k >= 0;) /* initialize table element n-1 to */ reg->tab[k] = k+1; /* n choose 1 = n (sizes for deg = 1) */ for (i = reg->deg-1; --i >= 0; ) { /* compute the number as */ for (k = 0; ++k < reg->dim; ) { /* (dim +deg -1) choose deg */ if (reg->tab[k] > INT_MAX -reg->tab[k-1]) return -1; /* check the number of coefficients */ reg->tab[k] += reg->tab[k-1]; } /* compute next binomial coefficient */ } /* by dynamic programming */ k = reg->tab[reg->dim-1]; /* finally get the number of params. */ return (k > REG_MAX) ? -1 : k;/* from the last table field */} /* _count() *//*--------------------------------------------------------------------*/static void _prods (REGRESS *reg, double *vec){ /* --- compute products of variables */ int i, k, m; /* loop variables */ int *s; /* table of section sizes */ double *b, *p; /* to traverse the buffer */ assert(reg); /* check the function argument */ b = reg->buf +reg->cnt -1; /* traverse the buffer */ if (reg->deg <= 1) { /* if linear regresseion */ for (i = reg->dim-1; --i >= 0; ) *--b = vec[i]; /* simply copy the data vector */ return; /* to the internal buffer and */ } /* then abort the function */ for (s = reg->tab +(i = reg->dim-1); --i >= 0; ) { *--b = vec[i]; /* copy the linear terms to the end */ *--s = i+1; /* and set the initial section sizes */ } /* (elements to combine per variable) */ for (k = 2; 1; ) { /* traverse the exponent sums */ p = b; /* note the previous section */ for (i = reg->dim-1; --i >= 0; ) for (m = s[i]; --m >= 0;) /* compute next section (next exp.) */ *--b = p[m] *vec[i]; /* from the previous one */ if (++k > reg->deg) break; /* if highest degree reached, abort */ for (i = 0; ++i < reg->dim; ) s[i] += s[i-1]; /* compute next section sizes */ } /* (dynamic programming) */} /* _prods() *//*---------------------------------------------------------------------- Main Functions----------------------------------------------------------------------*/REGRESS* reg_create (int dim, int deg){ /* --- create a regression object */ REGRESS *reg; /* created regression object */ assert((dim > 1) && (deg > 0)); /* check the function arguments */ reg = (REGRESS*)malloc(sizeof(REGRESS)); if (!reg) return NULL; /* create the base structure */ reg->dim = dim; /* note the number of variables */ reg->deg = deg; /* and the degree of the polynomial */ reg->sse = reg->max = -1; /* invalidate sum of squared errors */ reg->tab = (int*)malloc(dim *sizeof(int)); if (!reg->tab) { free(reg); return NULL; } reg->cnt = _count(reg); /* compute the number of parameters */ if (reg->cnt < 0) { free(reg->tab); free(reg); return NULL; } reg->vec = (double*)malloc((3*reg->cnt +dim) *sizeof(double)); if (!reg->vec) { free(reg->tab); free(reg); return NULL; } reg->buf = reg->vec +dim; /* organize the memory into buffers */ reg->rhs = reg->buf +reg->cnt;/* for data and power products */ reg->cfs = reg->rhs +reg->cnt;/* and a right hand side vector */ reg->mat = mat_create(reg->cnt, reg->cnt); if (!reg->mat) { /* create the regression matrix */ free(reg->vec); free(reg->tab); free(reg); return NULL; } return reg; /* return the created reg. object */} /* reg_create() *//*--------------------------------------------------------------------*/void reg_delete (REGRESS *reg){ /* --- delete a regression object */ assert(reg); /* check the function argument */ mat_delete(reg->mat); /* delete the regression matrix, */ free(reg->vec); /* the internal buffers, */ free(reg->tab); /* the table of sizes, */ free(reg); /* and the base structure */} /* reg_delete() *//*--------------------------------------------------------------------*/#ifdef REG_EXTFNREGRESS* reg_createx (ATTMAP *attmap, int deg){ /* --- create a regression object */ REGRESS *reg; /* created regression object */ assert(attmap /* check the function arguments */ && (am_outcnt(attmap) <= 1)); reg = reg_create(am_incnt(attmap) +am_outcnt(attmap), deg); if (!reg) return NULL; /* create a regression object */ reg->attset = am_attset(attmap); reg->attmap = attmap; /* store attribute set and map */ reg->trgatt = am_att(attmap, -1); return reg; /* return created regression object */} /* reg_createx() *//*--------------------------------------------------------------------*/void reg_deletex (REGRESS *reg, int delas){ /* --- delete a regression object */ if (delas) { /* delete the attribute set and map */ am_delete(reg->attmap); as_delete(reg->attset); } reg_delete(reg); /* delete the regression object */} /* reg_deletex() */#endif /* #ifdef REG_EXTFN *//*--------------------------------------------------------------------*/void reg_init (REGRESS *reg){ /* --- (re)initialize regression */ mat_init(reg->mat, MAT_ZERO|MAT_VECTOR|MAT_WEIGHT, NULL); reg->sse = -1; /* clear matrix and sum of errors */} /* reg_init() *//*--------------------------------------------------------------------*/int reg_aggr (REGRESS *reg, double *vec, double wgt){ /* --- aggregate a data vector */ int i, k; /* loop variables */ double *b; /* to traverse the buffer */ double t, o = 0; /* temporary buffers */ assert(reg && vec && (wgt >= 0)); /* check the function arguments */ if (reg->max > 0) { /* if logit transformation, */ b = vec +reg->dim -1; o = *b; /* get the output value */ if ((o <= 0) || (o >= reg->max)) return -1; *b = log((reg->max -o) / o); } /* transform the output value */ if (reg->deg <= 1) /* --- if linear regression */ mat_addmpx(reg->mat, vec, wgt); /* aggregate the data vector */ else if (reg->dim <= 2) { /* --- if single variable regression */ b = reg->buf +(i = reg->deg); /* compute the needed powers */ *b = vec[1]; *--b = t = vec[0]; /* of the regressor variable */ while (--i > 0) { *--b = t *= vec[0]; } mat_addmpx(reg->mat, b, wgt); } /* aggregate vector of powers */ else if (reg->deg <= 2) { /* --- if quadratic regression */ b = reg->buf +reg->cnt; /* traverse the buffer */ for (i = reg->dim; --i >= 0; ) *--b = vec[i]; /* copy linear terms to the end */ for (i = reg->dim-1; --i >= 0; ) for (k = i+1; --k >= 0; ) /* compute squared values */ *--b = vec[i]*vec[k]; /* and mixed terms */ mat_addmpx(reg->mat, b, wgt); } /* aggregate vector of products */ else { /* --- if general regression */ _prods(reg, vec); /* compute products of the variables */ reg->buf[reg->cnt-1] = vec[reg->dim-1]; /* and store the output */ mat_addmpx(reg->mat, reg->buf, wgt); } /* aggregate the vector of products */ if (reg->max > 0) /* if logit transformation, */ vec[reg->dim-1] = o; /* restore the output value */ return 0; /* return 'ok' */} /* reg_aggr() *//*--------------------------------------------------------------------*/#ifdef REG_EXTFNint reg_aggrx (REGRESS *reg, TUPLE *tpl){ /* --- aggregate a data tuple */ assert(reg); /* check the function arguments */ am_exec(reg->attmap, tpl, AM_INPUTS|AM_TARGET, reg->vec); return reg_aggr(reg,reg->vec, /* aggregate the data vector */ (tpl) ? tpl_getwgt(tpl) : as_getwgt(reg->attset));} /* reg_aggrx() */#endif/*--------------------------------------------------------------------*/int reg_solve (REGRESS *reg){ /* --- solve regression eq. system */ assert(reg); /* check the function argument */ reg->so2 = mat_regress(reg->mat, reg->rhs, reg->mat); if (mat_chdecom(reg->mat, reg->mat) != 0) return -1; mat_chsubst(reg->cfs, reg->mat, reg->rhs); return 0; /* set up the regression eq. system */} /* reg_solve() */ /* and solve it with Cholesky decomp. *//*--------------------------------------------------------------------*/double reg_sse (REGRESS *reg){ /* --- compute sum of squared errors */ int i, k; /* loop variables */ double sse, t = 0; /* sum of squared errors, buffer */ double *r, *c; /* to access rhs and coefficients */ double *p; /* to access the matrix rows */ assert(reg); /* check the function argument */ if (reg->sse >= 0) /* if sum of squared errors is known, */ return reg->sse; /* simply return it */ r = reg->rhs; c = reg->cfs; /* compute the sum of squared errors */ i = reg->cnt -1; /* from the aggregated vectors */ sse = reg->so2 +c[i] *c[i] *mat_getwgt(reg->mat) -c[i] *r[i] *2; while (--i >= 0) { /* traverse the matrix rows */ p = mat_row(reg->mat, i); /* except the last one */ for (t = 0, k = i+1; --k >= 0; ) t += p[k] *p[k]; /* recompute the matrix diagonal */ sse += c[i] *c[i] *t; /* from the Cholesky decomposition */ for (t = -r[i], k = reg->cnt; --k > i; ) t += c[k] *p[k]; /* compute c^T *M *c -2 *c^T *r +y^2, */ sse += c[i] *t *2; /* where M is the original matrix and */ } /* r the right hand side of the LES */ return (sse < 0) ? 0 : sse; /* return the sum of squared errors */} /* reg_sse() *//*--------------------------------------------------------------------*/double reg_exec (REGRESS *reg, double *vec){ /* --- execute a regression function */ int i, k; /* loop variables */ double *b, *c; /* to traverse the buffer */ double r, t; /* value of regression func., buffer */ assert(reg && vec); /* check the function arguments */ if (reg->deg <= 1) { /* --- if linear regression */ c = reg->cfs +(i = reg->dim-1); r = *c; /* compute linear function */ while (--i >= 0) r += *--c *vec[i]; } else if (reg->dim <= 2) { /* --- if single variable regression */ c = reg->cfs +(i = reg->deg); r = *c; /* multiply the different powers */ r += *--c *(t = vec[0]); /* with the computed coefficients */ while (--i > 0) r += *--c *(t *= vec[0]); } else if (reg->deg <= 2) { /* --- if quadratic regression */ c = reg->cfs +reg->cnt -1; /* traverse the coefficients */ r = *c; /* start with the constant term */ for (i = reg->dim-1; --i >= 0; ) r += *--c *vec[i]; /* add the linear terms */ for (i = reg->dim-1; --i >= 0; ) { for (k = i+1; --k >= 0; ) /* add the squared values */ r += *--c *vec[i]*vec[k]; /* and the mixed terms */ } } else { /* --- if general regression */ _prods(reg, vec); /* compute products of variables */ b = reg->buf +reg->cnt -1; /* traverse the buffer */ c = reg->cfs +reg->cnt -1; /* and the coefficients */ r = *c; /* start with the constant term */ for (i = reg->cnt-1; --i >= 0; ) r += *--c * *--b; /* multiply the different products */ } /* with the corresponding coeffs. */ if (reg->max > 0) /* if logistic regression, */ r = reg->max /(1 +exp(r)); /* apply inverse transformation */ return r; /* return the value of the */} /* reg_exec() */ /* regression function *//*--------------------------------------------------------------------*/#ifdef REG_EXTFNdouble reg_execx (REGRESS *reg, TUPLE *tpl){ /* --- execute a regression function */ assert(reg); /* check the function arguments */ am_exec(reg->attmap, tpl, AM_INPUTS, reg->vec); return reg_exec(reg, reg->vec);} /* reg_execx() */ /* execute the regression function */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -