📄 spardat.c
字号:
/* Logistic Regression using Truncated Iteratively Re-weighted Least Squares (includes several programs) Copyright (C) 2005 Paul Komarek This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA Author: Paul Komarek, komarek@cmu.edu Alternate contact: Andrew Moore, awm@cs.cmu.edu*//* File: spardat.c Author: Andrew W. Moore Created: Fri Feb 15 14:06:41 EST 2002 Description: Sparse categorical dataset Copyright 2002, The Auton Lab, CMU*/#include "amiv.h"#include "amdyv.h"#include "spardat.h"int spardat_num_atts(const spardat *sp){ return string_array_size(sp->attnum_to_name);}int spardat_num_rows(const spardat *sp){ return ivec_array_size(sp->row_to_posatts);}char *spardat_attnum_to_name(const spardat *sp,int attnum){ return string_array_ref(sp->attnum_to_name,attnum);}ivec *spardat_attnum_to_posrows(const spardat *sp,int attnum){ return ivec_array_ref(sp->attnum_to_rows,attnum);}int spardat_attnum_to_num_posrows(const spardat *sp,int attnum){ return ivec_size(spardat_attnum_to_posrows(sp,attnum));}ivec *spardat_row_to_posatts(const spardat *sp,int row){ return ivec_array_ref(sp->row_to_posatts,row);}int spardat_row_to_num_posatts(const spardat *sp,int row){ return ivec_size(spardat_row_to_posatts(sp,row));}int spardat_row_to_outval(const spardat *sp,int row){ return ivec_ref(sp->row_to_outval,row);}int spardat_outval_to_num_rows(const spardat *sp,int outval){ return ivec_size(spardat_outval_to_rows(sp,outval));}ivec *spardat_outval_to_rows(const spardat *sp,int outval){ my_assert(outval == 0 || outval == 1); return ivec_array_ref(sp->outval_to_rows,outval);}int num_posrows_in_subset(const spardat *sp,ivec *rows,int attnum){ return size_of_sivec_intersection(spardat_attnum_to_posrows(sp,attnum), rows);}ivec *mk_posrows_subset(const spardat *sp,ivec *rows,int attnum){ return mk_sivec_intersection(spardat_attnum_to_posrows(sp,attnum),rows);}dyv *mk_spardat_times_dyv( const spardat *sp, const dyv *dv){ int numrows, row; double sum; ivec *posatts; dyv *r_dv; numrows = spardat_num_rows( sp); r_dv = mk_dyv( numrows); for (row=0; row<numrows; ++row) { posatts = spardat_row_to_posatts( sp, row); sum = dyv_partial_sum( dv, posatts); dyv_set( r_dv, row, sum); } return r_dv;}dyv *mk_spardat_transpose_times_dyv( const spardat *sp, const dyv *dv){ int numatts, att; double sum; ivec *posrows; dyv *r_dv; numatts = spardat_num_atts( sp); r_dv = mk_dyv( numatts); for (att=0; att<numatts; ++att) { posrows = spardat_attnum_to_posrows( sp, att); sum = dyv_partial_sum( dv, posrows); dyv_set( r_dv, att, sum); } return r_dv;}ivec_array *mk_row_to_posatts_from_precs(precs *ps){ ivec_array *row_to_posatts = mk_empty_ivec_array(); int row; for ( row = 0 ; row < precs_num_rows(ps) ; row++ ) { ivec *posatts = precs_row_to_factors(ps,row); add_to_ivec_array(row_to_posatts,posatts); } return row_to_posatts;}ivec *mk_row_to_outval_from_precs(precs *ps,double act_thresh, bool high_means_active){ int num_rows = precs_num_rows(ps); ivec *row_to_outval = mk_ivec(num_rows); int row; for ( row = 0 ; row < num_rows ; row++ ) { double activation = precs_row_to_activation(ps,row); bool active; int outval; if ( high_means_active ) active = activation >= act_thresh; else active = activation <= act_thresh; outval = (active) ? 1 : 0; ivec_set(row_to_outval,row,outval); } return row_to_outval;}void fprintf_spardat(FILE *s,char *m1,spardat *x,char *m2){ char *buff; buff = mk_printf("%s -> attnum_to_name",m1); fprintf_string_array(s,buff,x->attnum_to_name,m2); free_string(buff); buff = mk_printf("%s -> attnum_to_rows",m1); fprintf_ivec_array(s,buff,x->attnum_to_rows,m2); free_string(buff); buff = mk_printf("%s -> row_to_posatts",m1); fprintf_ivec_array(s,buff,x->row_to_posatts,m2); free_string(buff); buff = mk_printf("%s -> row_to_outval",m1); fprintf_ivec(s,buff,x->row_to_outval,m2); free_string(buff); buff = mk_printf("%s -> outval_to_rows",m1); fprintf_ivec_array(s,buff,x->outval_to_rows,m2); free_string(buff);}/* Returns the number of non-zero values in the sparse matrix of inputs */int spardat_num_non_zero(spardat *sp){ return sum_of_ivec_array_sizes(sp->row_to_posatts); }spardat *mk_spardat(string_array *attnum_to_name, const ivec_array *row_to_posatts, const ivec *row_to_outval){ spardat *sp = AM_MALLOC(spardat); int num_atts = string_array_size(attnum_to_name); int row; int num_rows = ivec_array_size(row_to_posatts); ivec *zero_length_ivec = mk_ivec(0); if ( ivec_size(row_to_outval) != ivec_array_size(row_to_posatts) ) { my_errorf("The factor file contains information about %d compounds.\n" "The activation file contains information about %d\n" "activations. There should be the same number of records\n" "in each.\n",ivec_size(row_to_outval), ivec_array_size(row_to_posatts)); } if (Verbosity >= 3) printf("Making spardat row_to_posatts...\n"); sp->row_to_posatts = mk_copy_ivec_array(row_to_posatts); if (Verbosity >= 3) printf("Making spardat row_to_outval...\n"); sp->row_to_outval = mk_copy_ivec(row_to_outval); if (Verbosity >= 3) printf("Making spardat attnum_to_rows...\n"); sp->attnum_to_rows = mk_array_of_zero_length_ivecs(num_atts); for ( row = 0 ; row < num_rows ; row++ ) { ivec *posatts = ivec_array_ref(row_to_posatts,row); int i; for ( i = 0 ; i < ivec_size(posatts) ; i++ ) { int attnum = ivec_ref(posatts,i); ivec *rows_for_this_attnum; my_assert(ivec_array_size(sp->attnum_to_rows) > attnum); rows_for_this_attnum = ivec_array_ref(sp->attnum_to_rows,attnum); add_to_ivec(rows_for_this_attnum,row); } } if (Verbosity >= 3) printf("Making spardat attnum_to_names...\n"); sp -> attnum_to_name = mk_copy_string_array(attnum_to_name); if (Verbosity >= 3) printf("Making spardat outval_to_rows...\n"); sp -> outval_to_rows = mk_array_of_zero_length_ivecs(2); for ( row = 0 ; row < num_rows ; row++ ) { int outval = ivec_ref(row_to_outval,row); ivec *rows_for_this_outval = ivec_array_ref(sp->outval_to_rows,outval); add_to_ivec(rows_for_this_outval,row); } free_ivec(zero_length_ivec); my_assert(string_array_size(attnum_to_name) == ivec_array_size(sp->attnum_to_rows)); if (Verbosity >= 3) printf("...entire spardat constructed\n"); if (Verbosity >= 3) { printf("spardat has %d rows, %d attributes, and %d non-zero " "input values\n", spardat_num_rows(sp), spardat_num_atts(sp), spardat_num_non_zero(sp)); } return sp;}spardat *mk_copy_spardat(const spardat *sp){ spardat *spcopy; spcopy = AM_MALLOC( spardat); spcopy->attnum_to_name = mk_copy_string_array( sp->attnum_to_name); spcopy->attnum_to_rows = mk_copy_ivec_array( sp->attnum_to_rows); spcopy->row_to_posatts = mk_copy_ivec_array( sp->row_to_posatts); spcopy->row_to_outval = mk_copy_ivec( sp->row_to_outval); spcopy->outval_to_rows = mk_copy_ivec_array( sp->outval_to_rows); return spcopy;}spardat *mk_spardat_with_default_attnames(ivec_array *row_to_posatts, ivec *row_to_outval){ ivec *all_attnums = mk_identity_ivec(1+ivec_array_max_value(row_to_posatts)); string_array *attnames = mk_string_array_from_ivec(all_attnums); spardat *sp = mk_spardat(attnames,row_to_posatts,row_to_outval); free_string_array(attnames); free_ivec(all_attnums); return sp;}spardat *mk_spardat_from_precs(precs *ps,double act_thresh, bool high_means_active){ ivec_array *row_to_posatts = mk_row_to_posatts_from_precs(ps); ivec *row_to_outval = mk_row_to_outval_from_precs(ps,act_thresh, high_means_active); spardat *sp = mk_spardat_with_default_attnames(row_to_posatts,row_to_outval); free_ivec(row_to_outval); free_ivec_array(row_to_posatts); return sp;}/* Loads spardat using (first) afc pfile pformat (via precs). */spardat *mk_spardat_from_filename(char *filename,double act_thresh,
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -