📄 ci_instances.c
字号:
/*********************************************************************//* LINEAR TREE for Supervised Learning *//* Versao 1.0 (10/12/1997) *//* Developed by: Joao Gama *//* LIACC - Uni.do Porto *//* jgama@ncc.up.pt *//*-------------------------------------------------------------------*//* File: Ltree.c *//*********************************************************************/#include <stdio.h>#include <stdlib.h>#include <string.h>#include <values.h>#include "Ci_instances.h"#include "utils.h"static char UNKNOWN = '?';static char DONTCARE = '*';#define MAX_STR_SIZE 5024#define digit(ch) ((ch) >= '0' && (ch) <= '9')#define letter(ch) ((ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z'))/************************************************//* Private Methods *//************************************************/static CiExample **_ReadCiInstances(FILE *fi, DomainInfo *domain, unsigned long *nrexs, unsigned long nr);static AttrVal *ReadAttVal(FILE *fi, DomainInfo *domain, unsigned long nrex, long line_nr);static AttrVal *ReadCiInstance(FILE *fi, DomainInfo *domain, long nrex, int *classe);static void CiExchange(CiExample **examples, unsigned long ex1, unsigned long ex2);static DomainInfo *GenerateDomainInfo(char ***lines, int *nr_words, int nr_lines, int line, int natt);static int Processa_atributo(char **lines, int nr_words, int natt, char **name, int *type, void **vals, int *nr_vals);/************************************************//* Public Methods *//************************************************/CiDs *ReadCiDataset(FILE *fi, DomainInfo *domain){ CiDs *ds = (CiDs *) malloc(sizeof(CiDs)); if (ds) { ds->examples = _ReadCiInstances(fi, domain, &ds->nr_exs, 1); ds->domain = domain; } else fprintf(stderr, "ReadCiDataset: Out of memory\n"); return ds;}void Show_CiInstances(CiDs *ds, long int Low, long int High){ register unsigned long i; if (High > ds->nr_exs) High = ds->nr_exs; printf("\nId\tNr.Att\tWeight\tCl\n"); for (i = Low; i <= High; i++) { printf("%.0f\t%d\t%.3f\t%d\t", Id(Ci_AttVal(ds, i)), Ci_NrAtts(Ci_Example(ds, i)),Ci_Weight(Ci_Example(ds, i)), Ci_Classe(Ci_Example(ds,i))); ShowCiInstance(ds->domain, Ci_AttVal(ds, i), Ci_NrAtts(Ci_Example(ds,i))); }}int Ci_ReBuildInstance(CiExample *exemplo, int nr_att){ register int i; AttrVal *instance; if ((instance = (AttrVal *) realloc(exemplo->instance, nr_att * sizeof(AttrVal))) != NULL) { for(i = 1+exemplo->nr_att; i < nr_att; i++) { TypeOfVal(instance[i]) = normal; instance[i].val.c = 0.0; } exemplo->instance = instance; exemplo->nr_att = nr_att - 1; return TRUE; } return FALSE;}/************************************************//* Private Methods *//************************************************/static CiExample **_ReadCiInstances\(FILE *fi, DomainInfo *domain, unsigned long *nrexs, unsigned long nr){ int classe; AttrVal *instance; CiExample *example, **examples; if (feof(fi)) { if ((examples = (CiExample **) calloc(nr - 1, sizeof(CiExample *))) == NULL) { fprintf(stderr, "CiReadInstances: Not enough memory\n"); exit(1); } *nrexs = nr - 1; return --examples; } if ((instance = ReadCiInstance(fi, domain, nr, &classe)) != NULL){ examples = _ReadCiInstances(fi, domain, nrexs, nr+1); if ((example = (CiExample *) malloc(sizeof(CiExample))) != NULL) { Ci_Classe(example) = classe; Ci_NrAtts(example) = NrAttrs(domain); Ci_Weight(example) = 1.0; example->instance = instance; examples[nr] = example; } else fprintf(stderr, "CiReadInstances: Not enough memory\n"); return examples; } else return _ReadCiInstances(fi, domain, nrexs, nr);}/***********************************************************//* Private Methods: Instances *//***********************************************************/static AttrVal *ReadCiInstance\(FILE *fi, DomainInfo *domain, long nrex, int *classe){ char *value; AttrVal *instance; static long line_nr = 0; *classe = -1; if ((instance = ReadAttVal(fi, domain, nrex, line_nr)) != NULL) { if ((value = ReadField(fi, ",\t ")) != NULL) *classe = IdValLbl(domain, NrAttrs(domain)+1, value); else while ((value = ReadField(fi, ",\t ")) != NULL); if (*classe == -1) fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid Classe value: %s\n", nrex, line_nr, value); } ++line_nr; return (*classe == -1) ? NULL : instance;}static AttrVal *ReadAttVal\(FILE *fi, DomainInfo *domain, unsigned long nrex, long line_nr){ register int i; int pos = 0; char *value; AttrVal *attr_val = (AttrVal *) calloc(1+NrAttrs(domain), sizeof(AttrVal)); if (attr_val == NULL) { fprintf(stderr, "ReadAttVal: Out of Memory\n"); return NULL; } Id(attr_val) = (ContType) nrex; for(i = 1; i <= NrAttrs(domain) && pos != -1; i++) { if ((value = ReadField(fi, ",\t ")) != NULL) { if (*value == UNKNOWN) { TypeOfVal(attr_val[i]) = unknown; attr_val[i].val.c = MINFLOAT; } else if (*value == DONTCARE) { TypeOfVal(attr_val[i]) = dontcare; attr_val[i].val.c = MINFLOAT; } else { switch(CiTypeAttr(domain, i)) { case continuous: attr_val[i].val.c = atof(value); break; case integer: attr_val[i].val.d = atoi(value); if ((pos = IdValLbl(domain, i, value)) == 0) fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid attribute ( %d )value: %s\n", nrex, line_nr, i, value); break; case ordered: case nominal: pos = IdValLbl(domain, i, value); if (pos) attr_val[i].val.d = pos; else { TypeOfVal(attr_val[i]) = unknown; fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid attribute ( %d )value: %s\n", nrex, line_nr, i, value); } break; } } } else break; } if (i > NrAttrs(domain)) return attr_val; if (i > 1) fprintf(stderr, "ReadAttVal: Instance %ld (Line %ld) Invalid number of attributes\n", nrex, line_nr); free(attr_val); return NULL;}void ShowCiInstance(DomainInfo *domain, AttrVal *instance, int nr_att){ register int j; if (instance != NULL) { for(j = 1; j <= nr_att; j++) { switch(TypeOfVal(instance[j])) { case normal: switch(CiTypeAttr(domain, j)) { case continuous: printf("%6.3f\t", instance[j].val.c); break; case integer: printf("%d\t", instance[j].val.d); break; case ordered: case nominal: printf("%s\t", LblValId(domain, j, instance[j].val.d)); break; } break; case unknown: printf("?\t"); break; case dontcare: printf("?\t"); break; } } printf("\n"); }}/**************************************//* Public Methods for SORT *//**************************************//*************************************************** Goal: Sort instances between [Low .. High] by the values of attribute Att Input: Domain Info Array of arrays of instances Attribute Limits of instances Output: TRUE or FALSE****************************************************/int CiQuickSort(CiDs *ds, int Att, unsigned long Low, unsigned long High){ register unsigned long i, Lower, Middle; double Thresh, value; enum AttrTypes tipo; tipo = CiTypeAttr(ds->domain, Att); if ( Low < High ) { switch(tipo) { case continuous: Thresh = CValAttEx(Ci_AttVal(ds, Low), Att); break; case nominal: case ordered: case integer: Thresh = DValAttEx(Ci_AttVal(ds, Low), Att); break; } Middle = Low; for ( i = Low ; i <= High ; i++ ) { value = tipo == continuous ? CValAttEx(Ci_AttVal(ds, i),Att) : (double) DValAttEx(Ci_AttVal(ds, i),Att); if (value <= Thresh ) { if (i != Middle ) CiExchange(Ci_Examples(ds),Middle, i); Middle++; } } if (Middle != Low) { Lower = Middle - 1; for ( i = Lower ; i >= Low ; i-- ) { value = tipo == continuous ? CValAttEx(Ci_AttVal(ds, i),Att) : (double) DValAttEx(Ci_AttVal(ds, i),Att); if (value == Thresh ) { if ( i != Lower ) CiExchange(Ci_Examples(ds),Lower, i); Lower--; } } CiQuickSort(ds, Att, Low, Lower); CiQuickSort(ds, Att, Middle, High); } } return TRUE;}/*************************************************** Goal: Join all examples with the same value (nominal attributes) Input: Domain Info Array of arrays of instances Attribute Spliting value Limits of examples Output: Spliting point****************************************************/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -