⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 looms.c

📁 留一模型选择法leave-one-out model selection
💻 C
字号:
#include <math.h>#include <ctype.h>#include <float.h>#include <string.h>#include <assert.h>#include <limits.h>#include <signal.h>#include "bsvm.h"#include "bsvm_util.h"#include "loo.h"static int cachesize, qpsize, verbosity;static int N;int *SV;BQP qp;double epsilon_train, epsilon_loo;double min_accuracy=0;int early_stop = 0; // a boolean variable indicating whether use early stopdouble c_upper=0, c_lower=0, c_prog=0;char c_prog_method = '+';double g_upper=0, g_lower=0, g_prog=0;char g_prog_method = '+';int s_seq;void terminating_signal_handler(int sig){	fprintf(stderr, "Terminated by %d\n", sig);	exit(0);	// just for flushing}void flush(int sig){	fprintf(stderr, "flushing..\n");	fflush(stdout);}int build_param_list(	double **plist, double lower, double upper, double prog, char method){	int num;	double param;	double *list;		// build the list of all parameters	if(method == '+')	{		int i;		num = (upper-lower)/prog+1;		list = (double*)xmalloc(sizeof(double)*(num+2));		list[0] = -1;		for(i=1, param=lower; i<=num; i++, param+=prog)		{			list[i] = param;		}		list[num+1] = -1;	}	else if(method == 'x')	{		int i;		num = 1 + log(upper/lower) / log(prog);		list = (double*)xmalloc(sizeof(double)*(num+2));		list[0] = -1;		for(i=1, param=lower; i<=num; i++, param*=prog)		{			list[i] = param;		}		list[num+1] = -1;	}	else	{		return -1;	}	*plist = list;	return num;}void exit_with_help(){	printf(		"Usage: looms [options] training_set_file [model_file]\n"	"options:\n"	"-a aclb : use early stop method with accuracy lower bond aclb\n"	"-c cost : set cost C of constraints violation\n"	"   cost := { c | c1-c2{x|+|-}c_prog }  ( no space allowed )\n"	"   ( default 1-512x2 )\n"	"-et epsilon : full training termination criterion tolerance (default 0.001)\n"	"-el epsilon : loo termination criterion tolerance (default = 0.1)\n"	"-g gamma : set gamma in kernel function\n"	"   gamma := { g | g1-g2{x|+|-}g_prog }  ( no space allowed )\n"	"   ( default 0.00025-2.048x2 )\n"	"-h : this help\n"	"-m cachesize : set cache memory size in MB (default 160)\n"	"-q qpsize : set subproblem size (default 10)\n"	"-s on/off: turns on/off S-shape sequence (default on)\n"	"-v {0,1,2,3} : verbosity (default 1)\n"	"   0 -- result of the whole model selection\n"	"   1 -- brief information on the loo of each parameter pair\n"	"   2 -- information on each loo phase\n"	"   3 -- information on each iteration of optimization\n"		);	exit(1);}int read_option(int argc, char **argv){	int i;	cachesize = 160;	qpsize = 10;	verbosity = 1;	c_lower = 1; c_upper = 512; c_prog = 2; c_prog_method = 'x';	g_lower = 0.00025; g_upper = 2.048; g_prog = 2; g_prog_method = 'x';	epsilon_train = 0.001;	epsilon_loo = 0.1;	s_seq = 1;	for (i=1;i<argc && argv[i][0]=='-';i+=2)	{		switch(argv[i][1])		{		case 'h':			exit_with_help();		case 'm':			cachesize = atoi(argv[i+1]);			break;		case 'q':			qpsize = atoi(argv[i+1]);			break;		case 'v':			verbosity = atoi(argv[i+1]);			break;		case 's':			if(strcasecmp(argv[i+1], "off")==0)				s_seq = 0;			break;		case 'c':		case 'g':		{			double upper=0, lower=0, prog=0;			char prog_method='+';			char *p1 = 0;			char *p2 = 0;						while(1)			{				p1 = strchr(argv[i+1], '-');				if(p1 == NULL) break;				*p1 = '\0';				p2=strpbrk(p1+1, "+-xX");				if(p2 == NULL) break;				break;			}						if(p1!=NULL)			{				if(p2==p1+1||p2==NULL)				{					if(argv[i][1] == 'c')						myerror("Incorrect range format or bad value for C\n");					else						myerror("Incorrect range format or bad value for gamma\n");				}								lower = atof(argv[i+1]);				prog_method = tolower(*p2);				*p2 = '\0';				upper = atof(p1+1);				prog = atof(p2+1);								if(prog_method == '-')				{					prog_method = '+';					prog = -prog;				}				if(lower <= 0 || upper <= 0 || prog == 0)				{					if(argv[i][1] == 'c')						myerror("Incorrect range format or bad value for C\n");					else						myerror("Incorrect range format or bad value for gamma\n");				}								assert(prog!=0);								if(prog_method == 'x' && (prog == 1||prog <= 0))				{					myerror("progress error when using multiplication progress\n");				}								assert( (prog_method=='x'&&prog!=1&&prog>0) ||						(prog_method=='+'&&prog!=0) );				if((prog_method=='x' && prog<1) ||					(prog_method=='+' && prog<0))				{					double max = (upper>lower?upper:lower);					upper = (upper<lower?upper:lower);					lower = max;				}				else				{					double max = (upper>lower?upper:lower);					lower = (upper<lower?upper:lower);					upper = max;				}				assert(prog_method=='x'&&(upper-lower)*(prog-1)>0 ||						prog_method=='+'&&(upper-lower)*prog>0 ||						upper == lower);			}			else	// single value			{				lower = upper = atof(argv[i+1]);				if(lower<=0)					myerror("Incorrect range format or bad value for C\n");				prog = 2;				prog_method = 'x';			}			if(argv[i][1] == 'c')	// C			{				c_upper = upper;				c_lower = lower;				c_prog = prog;				c_prog_method = prog_method;			}			else	// gamma			{				g_upper = upper;				g_lower = lower;				g_prog = prog;				g_prog_method = prog_method;			}			break;		}		case 'a':			min_accuracy = atof(argv[i+1]);			early_stop = 1;			break;		case 'e':			if(argv[i][2] == 't')			{				epsilon_train = atof(argv[i+1]);				break;			}			else if(argv[i][2] == 'l')			{				epsilon_loo = atof(argv[i+1]);				break;			}			// fall through if not '-et' nor '-el'		default:			fprintf(stderr, "%s: illegal option -- %c\n", argv[0], argv[i][1]);			exit_with_help();		}	}	if (qpsize < MINQPSIZE || qpsize > MAXQPSIZE)		myerror("Size of QP-subproblems is invalid");	if (c_upper==0)		myerror("C must be greater than 0");	if (g_upper==0)		myerror("gamma must be greater than 0");	if (epsilon_train <= 0 || epsilon_loo <= 0)		myerror("Epsilon must be greater than 0");	if (i == argc)		exit_with_help();	return i;}int main(int argc, char **argv){	FILE *fset, *fmodel=0;	int i = read_option(argc, argv);	int j;	int loo_miss;	double start_time = get_utime();	double *c_list, *g_list;	int num_c, num_g;	int j_low, j_up, j_step;	double opt_rate=0, opt_c, opt_gamma;	int early_stop_num = 0;	loo_param_t loo_param;	int max_test_error;	double *optimal_x;	fset = fopen(argv[i], "r");	if(fset==NULL)		myerror("Cannot open datafile\n");	if (i+1 < argc)	{		fmodel = fopen(argv[i+1], "w");	}	else	{		fmodel = fopen("model", "w");	}		if (!fmodel)		myerror("Cannot open modelfile\n");	N = initialize_loo(fset, cachesize, qpsize);	fclose(fset);	max_test_error = N*(1.0-min_accuracy);	optimal_x = (double*)xmalloc(sizeof(double)*N);	signal(SIGTERM, terminating_signal_handler);	signal(SIGHUP, terminating_signal_handler);	signal(SIGINT, terminating_signal_handler);	signal(SIGQUIT, terminating_signal_handler);	signal(SIGUSR1, flush);	start_time = get_utime();	if(0>(num_c=build_param_list(&c_list, c_lower, c_upper, c_prog, c_prog_method)))	{		myerror("Internal error: c_prog_method\n");	}	if(0>(num_g=build_param_list(&g_list, g_lower, g_upper, g_prog, g_prog_method)))	{		myerror("Internal error: g_prog_method\n");	}	j_low = 1;	j_up = num_c;	j_step = 1;	loo_param.kp.ktype = RBF_KERNEL;	loo_param.has_rate_bound = early_stop;	loo_param.max_test_error = max_test_error;	loo_param.epsilon_train = epsilon_train;	loo_param.epsilon_loo = epsilon_loo;	loo_param.verbosity = verbosity;	for(i=1; i<=num_g; i++)	{		reset_x();		for(j=j_low; j_step*j<=j_step*j_up; j+=j_step)		{			loo_param.C = c_list[j];			loo_param.kp.gamma = g_list[i];			alpha_seeding(c_list[j-j_step], c_list[j]);			if(verbosity >= 1)				printf("<C=%f,g=%f>\n", c_list[j], g_list[i]);			loo_miss = loo(&loo_param);						if(loo_miss>N)			{				// early stopped								if(early_stop)				{					early_stop_num++;					if(verbosity>=1)						printf("Cross validation accuracy = -1.00%% (early stopped)\n");				}				else					myerror("Internal error: negative loo rate\n");			}			else			{				double rate = 1-loo_miss/(double)N;				if(verbosity>=1)					printf("Cross validation accuracy = %5.3f%%\n", rate*100);				if(opt_rate < rate)				{					opt_rate = rate;					opt_c = c_list[j];					opt_gamma = g_list[i];					memcpy(optimal_x, get_x(), sizeof(optimal_x[0])*N);				}			}			if(early_stop && loo_miss<max_test_error)			{				loo_param.max_test_error = loo_miss;			}		}				if(s_seq)		{			// invert C loop			int temp = j_low;			j_low = j_up;			j_up = temp;			j_step = -j_step;		}		else		{			clear_cache();		}	}		if(verbosity >= 0)	{		printf("model selection completed, total time %f\n",				get_utime()-start_time);	}	if(early_stop_num < num_c*num_g)	{		if(verbosity >= 0)			printf("Optimal parameter: c=%f, gamma=%f, rate=%7.3f%%\n",					opt_c, opt_gamma, opt_rate*100);		if(fmodel)		{			int Nsv;			kernel_param_t kernel_param;						kernel_param.ktype = RBF_KERNEL;			kernel_param.degree = 0;			kernel_param.gamma = opt_gamma;			kernel_param.coef0 = 0;						Nsv = 0;			for(j=0; j<N; j++)			{				if(optimal_x[j] > ZERO)					Nsv++;			}			output_model(fmodel, Nsv, optimal_x, &kernel_param);		}	}	else	{		printf("No loo reaches the rate lbound\n");	}			free_loo();	free(c_list);	free(g_list);	free(optimal_x);	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -