📄 spardat.c

📁 ADaM is a data mining and image processing toolkit
💻 C
📖 第 1 页 / 共 2 页
字号:
上一页 12
				  bool high_means_active){  precs *ps = mk_precs_from_filename(filename);  spardat *sp = mk_spardat_from_precs(ps,act_thresh,high_means_active);  free_precs(ps);  return sp;}char *mk_filename_from_pfilename(char *pfilename,double *r_act_thresh,				 bool *r_high_means_active){  string_array *sa = mk_broken_string_using_seppers(pfilename,":");  char *problem = NULL;  char *filename = NULL;  if ( string_array_size(sa) == 0) {    problem = mk_copy_string( "The pfilename is an empty string.\n");  }  if ( string_array_size(sa) == 1 ) {    problem=mk_copy_string("There's no colon in the middle of the pfilename");  }  else if ( string_array_size(sa) > 2 ) {    problem=mk_copy_string("There's more than one colon");  }  if ( problem == NULL )  {    char *s1 = string_array_ref(sa,0);    char *s2 = string_array_ref(sa,1);    int s2len = (int)strlen(s2);    if ( s2len < 2 )      problem = mk_copy_string("Nothing useful after the colon");    else    {      char c = s2[s2len-1];      if ( c == '+' )	*r_high_means_active = TRUE;      else if ( c == '-' )	*r_high_means_active = FALSE;      else	problem = mk_copy_string("pfilename should end in + or -");            if ( problem == NULL )      {	s2[s2len-1] = '\0';	if ( !is_a_number(s2) )	  problem = mk_printf("%s is not a number",s2);	else	  *r_act_thresh = atof(s2);	s2[s2len-1] = c;      }    }    if ( problem == NULL )      filename = mk_copy_string(s1);  }	  if ( problem != NULL )  {    my_errorf("I was trying to parse \"%s\" as a pfilename, where a\n"	      "pfilename should consist of a filename, followed by a\n"	      "colon followed by a threshold follow by a + or -. For\n"	      "example, \"data.txt:50+\" would have been good. But\n"	      "with what you gave me there was the following problem:\n"	      "%s\n",pfilename,problem);  }  free_string_array(sa);  return filename;}ivec_array *mk_row_to_posatts_from_filename(char *fname){  PFILE *s = safe_pfopen(fname,"r");  bool finished = FALSE;  int line = 0;  ivec_array *row_to_posatts = mk_empty_ivec_array();  int report_size = 1;  printf("Beginning to load afc Factors from file %s...\n",fname);  while ( !finished )  {    string_array *sa = mk_next_tokens(s,&line,AUTO_FORMAT);    if ( sa == NULL )      finished = TRUE;    else    {      int size = string_array_size(sa);      if ( size < 1 )	my_errorf("On line %d of %s there are no tokens",line,fname);      else      {	ivec *f = mk_factors_from_string_array(sa,line);	add_to_ivec_array(row_to_posatts,f);	if ( ivec_array_size(row_to_posatts) >= report_size )	{	  printf("Loaded %d line%s from %s\n",ivec_array_size(row_to_posatts),		 (ivec_array_size(row_to_posatts)==1)?"":"s",fname);	  report_size = 2 * report_size;	}	free_ivec(f);      }      free_string_array(sa);    }  }  pfclose(s);  printf("...Finished loading %d afc Factors from file %s\n",	 ivec_array_size(row_to_posatts),fname);  return row_to_posatts;}/*We now have the ability to load spardats.I suggest you do the following in your h/afc directory...ln -s <revisit - where did these files go?>/jun02 jun02(Maybe also do it in your Linux_2.4.17_x86_gcc.debug directory)jun02 has the following files...activations3.ssv  factors3.ssv  names3.ssvactivations4.ssv  factors4.ssv  names4.ssvThese correspond to two separate sets of high throughput runs called TEST3and TEST4The factors for test X (where X = 3 or 4) are in factorsX.ssv . Theonly difference between this and the earlier spardat data files is thatfactorsX.ssv does not contain the output activation level for the experimentsat the start of each line. It only contains the inputs (the posatts).There are K sets of output activations for the X experiment, where  K = 4 if X = 3  K = 2 if X = 4The j'th activation class for the i'th row of factorX is specified inthe i'th row of the Actj column in activationsX.ssv.To load a spardat from this format, you must specify a wacky "jun02"filename syntax of this form:   <factorsfile>:<activationsfile>:<actcolumnname>for example:   jun02/factors3.ssv:jun02/activations3.ssv:Act1The spardat (mk_spardat_from_pfilename) has been updated to copewith this syntax as an additional option. It automatically figuresout when you are using this filename form as opposed to the otherspartdat loading options.*/spardat *mk_spardat_from_pfilename(char *pfilename,int argc,char *argv[]){  bool link;  spardat *sp;  string_array *sa;  link = index_of_arg( "link", argc, argv) > 0;  sp = NULL;  sa = mk_broken_string_using_seppers(pfilename,":");  if ( string_array_size(sa) == 2 ) {    double act_thresh;    bool high_means_active;        char *filename = mk_filename_from_pfilename(pfilename,&act_thresh,                                                &high_means_active);    sp = mk_spardat_from_filename(filename,act_thresh,high_means_active);    free_string(filename);  }  else {    my_errorf( "mk_spardat_from_filename: filename='%s'\n"               "****   needs exactly one colon in order to be loaded\n"               "****   as a spardat", pfilename);  }  free_string_array(sa);  if (Verbosity >= 1) {    printf("spardat loaded from pfilename %s has...\n"	   "  %d rows, %d attributes, and %d non-zero input values.\n"	   "  It has %d positive output values.\n",	   pfilename,spardat_num_rows(sp),	   spardat_num_atts(sp),	   spardat_num_non_zero(sp),spardat_outval_to_num_rows(sp,1));  }  return sp;}precs *mk_precs_from_spardat( spardat *sp){  int numatts, numrows, i;  double dval;  ivec *factors;  prec *p;  precs *ps;  numatts = spardat_num_atts( sp);  numrows = spardat_num_rows( sp);  /* Make precs. */  ps = mk_empty_precs();  for (i=0; i<numrows; ++i) {    dval    = spardat_row_to_outval( sp, i);    factors = spardat_row_to_posatts( sp, i);    p = mk_prec( dval, factors);    add_to_precs( ps, p);    free_prec(p);  }  return ps;}void free_spardat(spardat *sp){  free_string_array(sp->attnum_to_name);  free_ivec_array(sp->attnum_to_rows);  free_ivec_array(sp->row_to_posatts);  free_ivec(sp->row_to_outval);  free_ivec_array(sp->outval_to_rows);  AM_FREE(sp,spardat);}spardat *mk_spardat_from_subset_of_rows(const spardat *sp,ivec *rows){  ivec_array *sub_row_to_posatts=mk_ivec_array_subset(sp->row_to_posatts,rows);  ivec *sub_row_to_outval = mk_ivec_subset(sp->row_to_outval,rows);  spardat *sub = mk_spardat(sp->attnum_to_name,			    sub_row_to_posatts,sub_row_to_outval);  free_ivec_array(sub_row_to_posatts);  free_ivec(sub_row_to_outval);  return sub;}  spardat *mk_spardat_from_subset_of_attnums( const spardat *sp, ivec *attnums){  int numrows, rowindex, attindex, att, newatt, numinv;  ivec *attinv, *row, *newrow;  string_array *sub_attnum_to_name;  ivec_array *sub_row_to_posatts;  spardat *sub;  /*    attinv is an ivec that maps an attnum to its position in attnums:      attnums[ attinv[ attnum]] = attnum  */  if (ivec_size(attnums) == 0 ) attinv = mk_ivec(0);  else attinv = mk_invert_nonneg_ivec( attnums);  numinv = ivec_size( attinv);  /* Create new attnames. */  sub_attnum_to_name = mk_string_array_subset( sp->attnum_to_name, attnums);  /* Build row array. */  numrows = spardat_num_rows( sp);  sub_row_to_posatts = mk_empty_ivec_array();  for (rowindex=0; rowindex<numrows; ++rowindex) {    row = spardat_row_to_posatts( sp, rowindex);    newrow = mk_ivec(0);    /* Renumber or drop atts of this row. */    for (attindex=0; attindex < ivec_size( row); ++attindex) {      att = ivec_ref( row, attindex);      if (att >= numinv) continue;      newatt = ivec_ref( attinv, att);      if (newatt < 0) continue;      add_to_ivec( newrow, newatt); /* Let's hope this isn't a bottleneck                                       in practice. */    }    add_to_ivec_array( sub_row_to_posatts, newrow);    free_ivec( newrow);  }  /* Make new spardat. */  sub = mk_spardat( sub_attnum_to_name, sub_row_to_posatts, sp->row_to_outval);  /* Clean up. */  free_ivec( attinv);  free_string_array( sub_attnum_to_name);  free_ivec_array( sub_row_to_posatts);  return sub;}int spardat_num_active_rows(const spardat *sp){  return ivec_size(spardat_outval_to_rows(sp,1));}/* Returns the number of rows mentioned in "rows" in which the   output is active *//* Takes time linear in rows and independent of number of active rows.   This is more efficient than if we computed size of intersection   of sp active rows and rows */int spardat_num_active_in_rows(const spardat *sp,ivec *rows){  int sum = 0;  int i;  for ( i = 0 ; i < ivec_size(rows) ; i++ )  {    int row = ivec_ref(rows,i);    if ( ivec_ref(sp->row_to_outval,row) == 1 )      sum += 1;  }  return sum;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -