📄 gramod2.c
字号:
/*---------------------------------------------------------------------- File : gramod2.c Contents: graphical model management (based on directed graphs) network induction functions Author : Christian Borgelt History : 03.11.1995 file created as ines.c 06.11.1995 modified K2 programmed 05.03.1996 lower bound for improvement added 06.03.1996 adapted to modified set of evaluation measures 26.03.1996 use of minimal improvement corrected 18.06.1996 bug in use of minimal improvement fixed 07.03.1997 optimum weight spanning tree method added 23.06.1998 adapted to modified attset functions 27.04.1999 redesign of network induction completed 18.12.1999 bug in _owst fixed (loop variable) 14.12.2001 optimum spanning tree extension added 03.03.2002 network induction functions moved out of ines.c 30.03.2002 function gm_evalx added (from sian.c) 04.07.2002 local structure learning (re)added----------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <math.h>#include <assert.h>#include "vecops.h"#ifdef GM_INDUCE#include "hyper.h"#endif#ifndef GM_EVAL#define GM_EVAL#endif#include "gramod.h"#ifdef STORAGE#include "storage.h"#endif/*---------------------------------------------------------------------- Type Definitions----------------------------------------------------------------------*/typedef struct { /* --- edge --- */ int x, y; /* connected nodes, coordinates */ int mark; /* marker (> 0: edge is selectable) */ double eval; /* evaluation result */} EDGE; /* (edge) */typedef struct { /* --- connected component --- */ int id; /* identifier of representative */ int cnt; /* number of represented nodes */} COMP; /* (connected component) */typedef struct { /* --- condition selection --- */ GRAMOD *gm; /* graphical model */ ATTSET *attset; /* underlying attribute set */ TABLE *table; /* table to induce model from */ int search; /* search method */ int measure; /* evaluation measure */ double params[2]; /* evaluation measure parameters */ double minimp; /* minimum improvement */ double mrgimp; /* minimum imp. for leaf merging */ int lwise; /* flag for levelwise leaf merging */ int maxcon; /* maximum number of conditions */ int attcnt; /* number of attributes */ int ptcnt; /* maximum number of trees */ PTREE **ptrees; /* vector of probability trees */ EDGE *edges; /* vector of edges */ EDGE **etab; /* edge (evaluation) table */ EDGE **seles; /* selected edges (for _owst()) */ COMP *comps; /* connected components vector */ int step; /* counter for progress */ int verbose; /* flag for progress reporting */} CSEL; /* (condition selection) */typedef struct { /* --- simulated annealing --- */ TABLE *table; /* table to induce model from */ TABLE *test; /* table to test model on */ HTREE *best; /* best hypertree found so far */ HTREE *curr; /* current hypertree */ HTREE *tmp; /* test hypertree */} SIAN; /* (simulated annealing) *//*----------------------------------------------------------------------The edge table is an n x n matrix (n: number of attributes) in whicheach entry corresponds to the edge defined by the column index x andthe row index y. An edge is directed y->x. To avoid confusion, theconnected nodes are also called x and y in the edge structure.----------------------------------------------------------------------*//*---------------------------------------------------------------------- Constants----------------------------------------------------------------------*/static const char *snames[] = { /* search method names */ /* GM_NONE 0 */ "no search, only parameter estimation", /* GM_OWST 1 */ "optimum weight spanning tree construction", /* GM_EXTST 2 */ "optimum weight spanning tree extension", /* GM_TOPORD 3 */ "greedy condition selection on topological order", /* GM_NOLOOP 4 */ "greedy condition selection avoiding loops", /* GM_UNRES 5 */ "greedy condition selection without restrictions", /* GM_SIAN 6 */ "hypertree simulated annealing", /* GM_UNKNOWN 7 */ "unknown search method"};/*---------------------------------------------------------------------- Auxiliary Functions----------------------------------------------------------------------*/#ifdef GM_INDUCEstatic void _csel_clean (CSEL *csel){ /* --- clean up induction variables */ int i; /* loop variable */ PTREE **t; /* to traverse the prob./poss. trees */ assert(csel); /* check the function argument */ if (csel->ptrees) { /* if there is a tree vector */ for (t = csel->ptrees +(i = csel->ptcnt); --i >= 0; ) if (*--t) pt_delete(*t, 0); free(t); /* delete the prob./poss. trees */ } /* and the tree vector */ if (csel->comps) free(csel->comps); if (csel->etab) free(csel->etab); if (csel->edges) free(csel->edges);} /* _csel_clean() */ /* delete all other vectors *//*--------------------------------------------------------------------*/static int _eval (CSEL *csel, int cnt, int dep){ /* --- evaluate selected edges */ int i, k, n; /* loop variables, number of trees */ EDGE **e; /* to access the selected edges */ PTREE *pt; /* to traverse the prob./poss. trees */ ATT *con; /* condition to add */ float wgt; /* instantiation weight */ double eval; /* edge evaluation */ assert(csel && (cnt >= 0)); /* check the function argument */ if (csel->verbose) /* print the step counter */ fprintf(stderr, "%8d\b\b\b\b\b\b\b\b", ++csel->step); e = csel->seles; /* get the selected edges */ while (cnt > 0) { /* while there are edges left */ /* -- create probability/possibility trees -- */ n = (cnt > csel->ptcnt) ? csel->ptcnt : cnt; for (k = n; --k >= 0; ) { /* traverse the selected edges */ pt = pt_create(as_att(csel->attset, e[k]->x), csel->gm->type); if (!pt) return -1; /* create a prob./poss. tree */ csel->ptrees[k] = pt; /* and note it for evaluation */ if (csel->gm->tplcnt > 0) /* set the normalization factor */ pt_setnorm(pt, csel->gm->tplcnt); if (pt_concopy(pt, gm_ptree(csel->gm, e[k]->x)) != 0) return -1; /* copy all existing conditions */ if (e[k]->y == e[k]->x) continue; con = as_att(csel->attset, e[k]->y); if (pt_conadd(pt, pt_concnt(pt), con) != 0) return -1; /* add the conditioning attribute */ } /* (if there is such an attribute) */ /* -- estimate parameters -- */ for (i = tab_tplcnt(csel->table); --i >= 0; ) { tpl_toas(tab_tpl(csel->table, i)); /* traverse the table */ wgt = as_getwgt(csel->attset); for (k = n; --k >= 0; ) /* traverse the condition trees */ if (pt_aggr(csel->ptrees[k], wgt, 0) < 0) return -1; } /* aggregate the tuple for all trees */ /* -- evaluate probability/possibility trees -- */ for (k = n; --k >= 0; ) { /* traverse the trees and */ pt = csel->ptrees[k]; /* evaluate each of them */ eval = (dep) ? pt_depend(pt, csel->measure, csel->params) : pt_eval (pt, csel->measure, csel->params); if (eval <= PT_ERROR) return -1; if (!dep && (csel->lwise >= 0)) { eval = pt_local(pt, csel->lwise, csel->mrgimp); if (eval <= PT_ERROR) return -1; } /* do local structure learning */ e[k]->eval = eval; /* note the evaluation result */ pt_delete(pt, 0); csel->ptrees[k] = NULL; } /* delete the prob./poss. tree */ cnt -= n; e += n; /* skip the processed edges */ } return 0; /* return 'ok' */} /* _eval() *//*---------------------------------------------------------------------- Optimum Weight Spanning Tree Construction----------------------------------------------------------------------*/static int _ecmp (const void *p1, const void *p2, void *data){ /* --- compare two edges */ if ((((EDGE*)p1)->mark & 3) > (((EDGE*)p2)->mark & 3)) return -1; if ((((EDGE*)p1)->mark & 3) < (((EDGE*)p2)->mark & 3)) return 1; if ( ((EDGE*)p1)->eval > ((EDGE*)p2)->eval) return -1; if ( ((EDGE*)p1)->eval < ((EDGE*)p2)->eval) return 1; return 0; /* return sign of eval. difference */} /* _ecmp() *//*--------------------------------------------------------------------*/static int _tree (CSEL *csel, int n){ /* --- build spanning tree */ int i; /* loop variables */ int x, y; /* node identifiers */ EDGE **p, **q; /* to traverse the edges */ COMP *c; /* to traverse the connected comps. */ assert(csel && (n >= 0)); /* check the function arguments */ if (!csel->comps) { /* if there is no con. comp. vector */ csel->comps = (COMP*)malloc(csel->attcnt *sizeof(COMP)); if (!csel->comps) return -1;/* create a connected comp. vector */ } /* (to be able to check for loops) */ for (c = csel->comps +(i = csel->attcnt); --i >= 0; ) { (--c)->id = i; c->cnt = 1;} /* init. the connected components */ v_sort(csel->seles, n, _ecmp, NULL); /* and sort the edges */ for (p = q = csel->seles; --n >= 0; q++) { if ((*q)->eval < csel->minimp) break; /* check eval. against minimum imp. */ x = (*q)->x; /* find first node's component id. */ while (x != c[x].id) x = c[x].id; y = (*q)->y; /* find second node's component id. */ while (y != c[y].id) y = c[y].id; if (x == y) /* if the components are the same, */ continue; /* skip the edge (avoid loops) */ if (c[x].cnt < c[y].cnt) { c[y].cnt += c[x].cnt; c[x].id = y; } else { c[x].cnt += c[y].cnt; c[y].id = x; } *p++ = *q; /* combine the connected components */ } /* and collect the selected edges */ return (int)(p -csel->seles); /* return the number of tree edges */} /* _tree() *//*--------------------------------------------------------------------*/static int _owst (CSEL *csel){ /* --- optimum weight spanning tree */ int i, k, n; /* loop variables, number of edges */ int x, y; /* node identifiers */ EDGE *e, **p, **q; /* to traverse the edges */ COMP *c; /* to traverse the connected comps. */ assert(csel); /* check the function argument */ /* --- collect possible edges --- */ for (i = csel->attcnt; --i >= 0; ) { for (k = gm_cddcnt(csel->gm, i); --k >= 0; ) { y = gm_cddid(csel->gm, i, k); csel->etab[i][y].mark = csel->etab[y][i].mark = 1; } /* traverse the candidate attributes */ } /* and mark the selectable edges */ for (i = csel->attcnt; --i >= 0; ) { for (k = gm_concnt(csel->gm, i); --k >= 0; ) { y = gm_conid(csel->gm, i, k); csel->etab[i][y].mark = csel->etab[y][i].mark = 2; } /* traverse the existing conditions */ } /* and mark the mandatory edges */ p = csel->seles; /* traverse the edge table */ for (i = csel->attcnt; --i > 0; ) { for (e = csel->etab[i] +(k = i); --k >= 0; ) if ((--e)->mark > 0) *p++ = e; } /* collect the selectable edges */ n = (int)(p -csel->seles); /* and compute their number */ /* --- build tree and direct edges --- */ gm_conrem(csel->gm, -1, 0); /* remove all conditions */ if (_eval(csel, n, 0) != 0) return -1; n = _tree(csel, n); /* evaluate the selectable edges */ if (n < 0) return -1; /* and build a spanning tree */ for (c = csel->comps +(i = csel->attcnt); --i >= 0; ) (--c)->cnt = 0; /* initialize the edge counters */ for (p = csel->seles +(i = n); --i >= 0; ) { c[(*--p)->x].cnt++; /* traverse the edges and */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -