⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 buildtree.c

📁 source codes for ltr
💻 C
字号:
/*********************************************************************//*        LINEAR TREE for Supervised Learning                        *//*        Versao 1.0 (10/12/1997)                                    *//*        Developed by: Joao Gama                                    *//*                LIACC - Uni.do Porto                               *//*                jgama@ncc.up.pt                                    *//*-------------------------------------------------------------------*//*  File: BuildTree.c                                                *//*********************************************************************/#include <stdio.h>#include <stdlib.h>#include <math.h>#include <unistd.h>#include <errno.h>#include "Ci_instances.h"#include "distributions.h"#include "entropia.h"#include "tree.h"#include "BuildTree.h"#include "Combine.h"#include "discrim.h"#include "utils.h"#include "externs.i"static int c = 0, vc[100];#define NO_MEMORY(a) \  { fprintf (stderr, "\nERROR: %s Unable to allocate memory\n\n",a); exit( 1 ); }static Tree *build_tree(CiDs *ds, long int Low, long int High, int nr_att, TreeInfo *treeinfo, Tree *ascendent, int depth, double *errors, int *max_depth);static Data *Make_Data(double *class_distribution, double *acum_class_distr, int nr_att, double errors, int split_att, double split_value);/*static int Remove_Coefficients(DomainInfo *domain, Tree *tree, int nr_att);*/Tree *BuildTree\(CiDs *ds, long int Low, long int High, int nr_att, TreeInfo *treeinfo){  int depth = 0;  double sub_errors = 0.0;  Tree *tree;  tree =  build_tree(ds, Low, High, nr_att, treeinfo, NULL, 0, &sub_errors, &depth);  DEPTH(treeinfo) = depth;  return tree;}static Tree *build_tree\(CiDs *ds, long int Low, long int High, int nr_att, TreeInfo *treeinfo, Tree *ascendent, int depth, double *errors, int *max_depth){  register int i, k, majority_class;  int  act_depth = 0, hidden = 0, actual_nr_att, actual_node, actual_leaves;  long int posl, posU, posh, Midle;  double *ascendent_class_dist = NULL, *acum_class_dist, *dist_class;  double node_errors = 0.0, sub_errors = 0.0, mean;  double  ***stats, **coef = NULL;  Data *data;  EntAtt *bestent;  Tree *arvore = NULL;  ++NODES(treeinfo);  ++depth;  actual_node = NODES(treeinfo);  actual_leaves = LEAVES(treeinfo);    actual_nr_att = nr_att;  if ((stats = My_Distribuitions(ds, Low, High, nr_att, &dist_class)) == NULL)    NO_MEMORY("Generating statistics:")  if (ascendent != NULL) {    data = (Data *) DATA(ascendent);    ascendent_class_dist = (double *) CLASS_DIST(data);  }  acum_class_dist = combing_evidences(DOMAIN_INFO(ds), dist_class, ascendent_class_dist);  majority_class = dmajority(acum_class_dist, Ci_NrClasses(ds->domain));  node_errors = dist_class[0] - dist_class[majority_class];  if ((data = Make_Data(dist_class, acum_class_dist, nr_att, node_errors, 0, 0.0)) == NULL)    NO_MEMORY("Generating Data:")  /***********************************/  /*   Pre Prunning                  */  /***********************************/  if (!dist_class[0]) {    free_stats(ds->domain, stats, Ci_NrClasses(ds->domain), nr_att);    *max_depth = 1;    ++LEAVES(treeinfo);    return Make_Tree(data, NODES(treeinfo), 0, ascendent, leaf);  }  if (dist_class[0] < 2 * MINIMUM_EXAMPLES) {    free_stats(ds->domain, stats, Ci_NrClasses(ds->domain), nr_att);    ERRORS(treeinfo) += node_errors;    *errors += node_errors;    ++LEAVES(treeinfo);    *max_depth = 1;    return Make_Tree(data, NODES(treeinfo), 0, ascendent, leaf);  }  if (dist_class[majority_class] + MINIMUM_EXAMPLES > dist_class[0]) {    free_stats(ds->domain, stats, Ci_NrClasses(ds->domain), nr_att);    ERRORS(treeinfo) += node_errors;    *errors += node_errors;    ++LEAVES(treeinfo);    *max_depth = 1;    return Make_Tree(data, NODES(treeinfo), 0, ascendent, leaf);  }  if (dist_class[majority_class] / dist_class[0] >= MIN_SPLIT) {    free_stats(ds->domain,stats, Ci_NrClasses(ds->domain), nr_att);    ERRORS(treeinfo) += node_errors;    *errors += node_errors;    ++LEAVES(treeinfo);    *max_depth = 1;    return Make_Tree(data, NODES(treeinfo), 0, ascendent, leaf);  }  /***********************************/  /*   Build New Attributes          */  /***********************************/    if (MAX_DEPTH > depth) {     if ((coef = discriminant(ds, Low, High, stats, dist_class, nr_att, Ci_NrClasses(ds->domain), &hidden)) != NULL) {       if (hidden) {	apply_discriminant(ds, coef, Low, High, nr_att);	nr_att += hidden;      }    }  }  /***********************************/  /*   Select Attribute              */  /***********************************/    bestent = entropia(ds, stats, dist_class, nr_att, Low, High, &mean);  if (ENT_ATT(bestent) == -1) {    ++COLAPSES(treeinfo);    ERRORS(treeinfo) += node_errors;    *errors += node_errors;    ++LEAVES(treeinfo);    free(bestent);    free_stats(ds->domain, stats, Ci_NrClasses(ds->domain), actual_nr_att);    *max_depth = 1;    return Make_Tree(data, NODES(treeinfo), 0, ascendent, leaf);  }  SPLIT_ATT(data) = ENT_ATT(bestent);  SPLIT_VALUE(data) = ENT_SPLIT(bestent);  switch(CiTypeAttr(ds->domain,ENT_ATT(bestent))) {  case integer:  case ordered:  case nominal:    posl = Low;    arvore = Make_Tree(data, NODES(treeinfo), NValsAttr(ds->domain,ENT_ATT(bestent)), ascendent, split_discrete);    posU = CiJoinUnknowns(ds, ENT_ATT(bestent), Low, High);    for(i = Low; i < posU; i++)      Ci_Weight(Ci_Example(ds, i)) /= NValsAttr(ds->domain,ENT_ATT(bestent));    for(i = 1; i <= NValsAttr(ds->domain,ENT_ATT(bestent)) ; i++){      posh = CiJoinValues(ds, ENT_ATT(bestent), i, posU, High);      DESCENDENT(arvore, i) = build_tree(ds, Low, posh-1, nr_att, treeinfo, arvore, depth, &sub_errors, &act_depth);      if (*max_depth < act_depth) *max_depth = act_depth;      act_depth = 0;      posU = CiJoinUnknowns(ds, ENT_ATT(bestent), Low, posh-1);      for(k = Low; k < posU;k++)	Ci_ReBuildInstance(Ci_Example(ds, k), nr_att);    }    break;  case continuous:    arvore = Make_Tree(data, NODES(treeinfo), 2, ascendent, split_continuous);    CiQuickSort(ds, ENT_ATT(bestent), Low, High);    posU = Low;    while(!NormalVal(Ci_AttVal(ds, posU)[ENT_ATT(bestent)]))      Ci_Weight(Ci_Example(ds, posU++)) /= 2.0;        Midle = CiSplitingPosition(ds, ENT_ATT(bestent), Low, High, ENT_SPLIT(bestent));    DESCENDENT(arvore, 1) = build_tree(ds, Low, Midle, nr_att, treeinfo, arvore, depth, &sub_errors, &act_depth);    if (*max_depth < act_depth) *max_depth = act_depth;    act_depth = 0;    posU = CiJoinUnknowns(ds, ENT_ATT(bestent), Low, Midle);    for(k = Low; k < posU;k++)      Ci_ReBuildInstance(Ci_Example(ds, k), nr_att);    Midle = CiMoveUnknowns(ds, ENT_ATT(bestent), Low, posU, Midle);         DESCENDENT(arvore, 2) = build_tree(ds, Midle+1, High, nr_att, treeinfo, arvore, depth, &sub_errors, &act_depth);    if (*max_depth < act_depth) *max_depth = act_depth;    break;  }/*************************************//* Pos_prunning: Error reduction     *//*************************************/  if(node_errors <= sub_errors) {    ERRORS(treeinfo) += node_errors;    ERRORS(treeinfo) -= sub_errors;    NODES(treeinfo) = actual_node;    LEAVES(treeinfo) = 1 + actual_leaves;    *errors = node_errors;    DESCENDENTS(arvore) = NULL;    NR_DESCENDENTS(arvore) = 0;    arvore->tipo = leaf;    *max_depth = 0;    ++COLAPSES(treeinfo);  }  else{    COEFICIENTS(data) = coef;    COLUMNS(data) = actual_nr_att + 1 - Nr_Att_Non_Cont();    ROWS(data) = hidden;  }  free(bestent);  free_stats(ds->domain, stats, Ci_NrClasses(ds->domain), actual_nr_att);  ++(*max_depth);  return arvore;}static Data *Make_Data(double *class_distribution, double *acum_class_distr, int nr_att, double errors, int split_att, double split_value){  Data *data = NULL;  if ((data = (Data *) malloc(sizeof(Data))) != NULL) {    CLASS_DIST(data)      = class_distribution;    ACUM_CLASS_DIST(data) = acum_class_distr;    NR_ATT(data)          = nr_att;    COEFICIENTS(data)     = NULL;    COLUMNS(data)         = 0;    ROWS(data)            = 0;    DERRORS(data)         = errors;    TREE_ERRORS(data)     = 0.0;    SPLIT_ATT(data)       = split_att;    SPLIT_VALUE(data)     = split_value;  }  return data;}void ShowTree(DomainInfo *domain, Data *data, int nr, TYPE_NODE t){  register int i, Att, majority_class;  Att = SPLIT_ATT(data);  switch (t) {  case leaf:    majority_class = dmajority(ACUM_CLASS_DIST(data), Ci_NrClasses(domain));    printf("%s (%.2f/%.2f) [ ", 	   LblValId(domain,NrAttrs(domain) + 1 ,majority_class),	   CLASS_DIST(data)[0], DERRORS(data));     for(i = 1; i <= Ci_NrClasses(domain); i++)      printf("%.3f ",ACUM_CLASS_DIST(data)[i]);    printf("]");    break;  case split_discrete:    printf("%s = %s",NameAttr(domain, Att), LblValId(domain, Att, nr));         break;  case split_continuous:  case split_linear:    switch (nr) {    case 1:      if (Att > NrAttrs(domain))	printf("Linear_%d <= %.3f", Att, SPLIT_VALUE(data));      else	printf("%s <= %.3f",NameAttr(domain,Att), SPLIT_VALUE(data));      break;    case 2:      if (Att > NrAttrs(domain))	printf("Linear_%d > %.3f", Att, SPLIT_VALUE(data));      else	printf("%s > %.3f",NameAttr(domain, Att), SPLIT_VALUE(data));      break;    }    break;  }  printf("\n");}void Show_Coeficients(DomainInfo *domain, Tree *tree){  register int i, j, att, nr_rows, nr_columns;  double **coef;  Data *data;  static int nr = 0;  data = DATA(tree);  if (!nr) nr = NR_ATT(data);  switch(tree->tipo) {  case leaf:    break;  case split_discrete:  case split_continuous:  case split_linear:    nr_rows = ROWS(data);    nr_columns = COLUMNS(data);      coef = COEFICIENTS(data);    if (nr_rows && nr_columns && coef) {      for (i = 1; i <= nr_rows; i++) {	++nr;	printf("Linear_%d\t", nr);	printf("%+.3f", coef[i][1]);	for(j = 2, att = 1; att <= NR_ATT(data); att++)	  if (CiTypeAttr(domain, att))	    if(coef[i][j])	      printf("%+.3f*[%d]", coef[i][j++], att);	    else	      ++j;	printf("\n");      }    }    for(i = 1; i <= NR_DESCENDENTS(tree); i++)       Show_Coeficients(domain, DESCENDENT(tree, i));    break;  }}int WriteData(int fd, Data *data, int nr_cl){  register int i;  if (write(fd, &SPLIT_ATT(data), sizeof(int)) == sizeof(int))    if (write(fd, &SPLIT_VALUE(data), sizeof(double)) == sizeof(double))      if (write(fd, &DERRORS(data), sizeof(double)) == sizeof(double))	if (write(fd, &TREE_ERRORS(data), sizeof(double)) == sizeof(double))	  if (write(fd, 1+ACUM_CLASS_DIST(data), nr_cl * sizeof(double)) == nr_cl * sizeof(double))	    if (write(fd, CLASS_DIST(data), (nr_cl + 1) * sizeof(double)) == (nr_cl+1) * sizeof(double))	      if (write(fd, &NR_ATT(data), sizeof(int)) == sizeof(int)) {		if (write(fd, &COLUMNS(data), sizeof(int)) == sizeof(int))		  if (write(fd, &ROWS(data), sizeof(int)) == sizeof(int)) 		    if (COLUMNS(data) * ROWS(data)) {		      for(i = 1; i <= ROWS(data); i++)			if (write(fd, 1+(COEFICIENTS(data)[i]), COLUMNS(data) * sizeof(double)) != COLUMNS(data) * sizeof(double))			  return FALSE;		      return TRUE;		    }	      }  return FALSE;}void *ReadData(int fd, int nr_cl){  register int i;  int split_att, nr_att, rows, columns;  double *class_distribution, *acum_class_distr, errors, tree_errors, split_value;   double **coefs = NULL;  Data *data;  class_distribution = dvector(0, nr_cl);  acum_class_distr = dvector(1, nr_cl);    if (read(fd, &split_att, sizeof(int)) == sizeof(int))    if (read(fd, &split_value, sizeof(double)) == sizeof(double))      if (read(fd, &errors, sizeof(double)) == sizeof(double))	if (read(fd, &tree_errors, sizeof(double)) == sizeof(double))	  if (read(fd, 1+acum_class_distr, nr_cl * sizeof(double)) == nr_cl * sizeof(double))	    if (read(fd, class_distribution, (nr_cl + 1) * sizeof(double)) == (nr_cl + 1) * sizeof(double))	      if (read(fd, &nr_att, sizeof(int)) == sizeof(int))		if (read(fd, &columns, sizeof(int)) == sizeof(int))		  if (read(fd, &rows, sizeof(int)) == sizeof(int)) {		    if (columns * rows) {		      coefs = dmatrix(1, rows, 1, columns);		      for(i = 1; i <= rows; i++)			read(fd, 1+coefs[i], columns * sizeof(double));		    }		    data = Make_Data(class_distribution, acum_class_distr, nr_att, errors,split_att,split_value);		    TREE_ERRORS(data) = tree_errors;		    COLUMNS(data) = columns;		    ROWS(data) = rows;		    COEFICIENTS(data) = coefs;		    return data;		  }  return NULL;}int PruneTree(DomainInfo *domain, Data *data, double **distrib){  *distrib = CLASS_DIST(data);  return dmajority(ACUM_CLASS_DIST(data), Ci_NrClasses(domain));}int RemoveCoefficients\(DomainInfo *domain, Tree *tree, int nr_att){  register int i, nr_new = 0, new_desc = 0;  Data *data;  int test_att;  data = DATA(tree);  test_att = SPLIT_ATT(data);  nr_new = ROWS(data) > 0 ? ROWS(data) : 0;  if (nr_new > 0) {    for(i = c; i >= 0; i--) {      if(vc[i] >= nr_att + nr_new) {	vc[i] = 0; c--;      }    }  }    if (test_att > NrAttrs(domain)) vc[c++] = test_att;  switch(tree->tipo) {  case leaf:    return 0;    break;  case split_discrete:    for(i = 1; i <= NR_DESCENDENTS(tree); i++)       new_desc += RemoveCoefficients(domain, DESCENDENT(tree, i),NR_ATT(data)+nr_new);       break;  case split_continuous:  case split_linear:    new_desc = RemoveCoefficients(domain, DESCENDENT(tree, 1),NR_ATT(data)+nr_new);    new_desc += RemoveCoefficients(domain, DESCENDENT(tree, 2),NR_ATT(data)+nr_new);    break;  }  if (nr_new) {    for (i = 0; i <= c; i++)      if (vc[i] > nr_att) return 0;    ROWS(data) = 0;    COLUMNS(data) = 0;    COEFICIENTS(data) = NULL;  }  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -