⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm.cpp

📁 Language, Script, and Encoding Identification with String Kernel Classifiers
💻 CPP
📖 第 1 页 / 共 5 页
字号:
	if(model->probA) // regression has probA only	{		fprintf(fp, "probA");		for(int i=0;i<nr_class*(nr_class-1)/2;i++)			fprintf(fp," %g",model->probA[i]);		fprintf(fp, "\n");	}	if(model->probB)	{		fprintf(fp, "probB");		for(int i=0;i<nr_class*(nr_class-1)/2;i++)			fprintf(fp," %g",model->probB[i]);		fprintf(fp, "\n");	}	if(model->nSV)	{		fprintf(fp, "nr_sv");		for(int i=0;i<nr_class;i++)			fprintf(fp," %d",model->nSV[i]);		fprintf(fp, "\n");	}	fprintf(fp, "SV\n");	const double * const *sv_coef = model->sv_coef;	const svm_node * const *SV = model->SV;	for(int i=0;i<l;i++)	{		for(int j=0;j<nr_class-1;j++)			fprintf(fp, "%.16g ",sv_coef[j][i]);		const svm_node *p = SV[i];		while(p->index != -1)		{			fprintf(fp,"%d:%.8g ",p->index,p->value);			p++;		}		fprintf(fp, "\n");	}	fclose(fp);	return 0;}svm_model *svm_load_model(const char *model_file_name){	FILE *fp = fopen(model_file_name,"rb");	if(fp==NULL) return NULL;		// read parameters	svm_model *model = Malloc(svm_model,1);	svm_parameter& param = model->param;	model->rho = NULL;	model->probA = NULL;	model->probB = NULL;	model->label = NULL;	model->nSV = NULL;	char cmd[81];	while(1)	{		fscanf(fp,"%80s",cmd);		if(strcmp(cmd,"svm_type")==0)		{			fscanf(fp,"%80s",cmd);			int i;			for(i=0;svm_type_table[i];i++)			{				if(strcmp(svm_type_table[i],cmd)==0)				{					param.svm_type=i;					break;				}			}			if(svm_type_table[i] == NULL)			{				fprintf(stderr,"unknown svm type.\n");				free(model->rho);				free(model->label);				free(model->nSV);				free(model);				return NULL;			}		}		else if(strcmp(cmd,"kernel_type")==0)		{					fscanf(fp,"%80s",cmd);			int i;			for(i=0;kernel_type_table[i];i++)			{				if(strcmp(kernel_type_table[i],cmd)==0)				{					param.kernel_type=i;					break;				}			}			if(kernel_type_table[i] == NULL)			{				fprintf(stderr,"unknown kernel function.\n");				free(model->rho);				free(model->label);				free(model->nSV);				free(model);				return NULL;			}		}		else if(strcmp(cmd,"degree")==0)			fscanf(fp,"%lf",&param.degree);		else if(strcmp(cmd,"gamma")==0)			fscanf(fp,"%lf",&param.gamma);		else if(strcmp(cmd,"coef0")==0)			fscanf(fp,"%lf",&param.coef0);		else if(strcmp(cmd,"nr_class")==0)			fscanf(fp,"%d",&model->nr_class);		else if(strcmp(cmd,"total_sv")==0)			fscanf(fp,"%d",&model->l);		else if(strcmp(cmd,"rho")==0)		{			int n = model->nr_class * (model->nr_class-1)/2;			model->rho = Malloc(double,n);			for(int i=0;i<n;i++)				fscanf(fp,"%lf",&model->rho[i]);		}		else if(strcmp(cmd,"label")==0)		{			int n = model->nr_class;			model->label = Malloc(int,n);			for(int i=0;i<n;i++)				fscanf(fp,"%d",&model->label[i]);		}		else if(strcmp(cmd,"probA")==0)		{			int n = model->nr_class * (model->nr_class-1)/2;			model->probA = Malloc(double,n);			for(int i=0;i<n;i++)				fscanf(fp,"%lf",&model->probA[i]);		}		else if(strcmp(cmd,"probB")==0)		{			int n = model->nr_class * (model->nr_class-1)/2;			model->probB = Malloc(double,n);			for(int i=0;i<n;i++)				fscanf(fp,"%lf",&model->probB[i]);		}		else if(strcmp(cmd,"nr_sv")==0)		{			int n = model->nr_class;			model->nSV = Malloc(int,n);			for(int i=0;i<n;i++)				fscanf(fp,"%d",&model->nSV[i]);		}		else if(strcmp(cmd,"SV")==0)		{			while(1)			{				int c = getc(fp);				if(c==EOF || c=='\n') break;				}			break;		}		else		{			fprintf(stderr,"unknown text in model file\n");			free(model->rho);			free(model->label);			free(model->nSV);			free(model);			return NULL;		}	}	// read sv_coef and SV	int elements = 0;	long pos = ftell(fp);	while(1)	{		int c = fgetc(fp);		switch(c)		{			case '\n':				// count the '-1' element			case ':':				++elements;				break;			case EOF:				goto out;			default:				;		}	}out:	fseek(fp,pos,SEEK_SET);	int m = model->nr_class - 1;	int l = model->l;	model->sv_coef = Malloc(double *,m);	int i;	for(i=0;i<m;i++)		model->sv_coef[i] = Malloc(double,l);	model->SV = Malloc(svm_node*,l);	svm_node *x_space=NULL;	if(l>0) x_space = Malloc(svm_node,elements);	int j=0;	for(i=0;i<l;i++)	{		model->SV[i] = &x_space[j];		for(int k=0;k<m;k++)			fscanf(fp,"%lf",&model->sv_coef[k][i]);		while(1)		{			int c;			do {				c = getc(fp);				if(c=='\n') goto out2;			} while(isspace(c));			ungetc(c,fp);			fscanf(fp,"%d:%lf",&(x_space[j].index),&(x_space[j].value));			++j;		}	out2:		x_space[j++].index = -1;	}	fclose(fp);	model->free_sv = 1;	// XXX	return model;}void svm_destroy_model(svm_model* model){	if(model->free_sv && model->l > 0)		free((void *)(model->SV[0]));	for(int i=0;i<model->nr_class-1;i++)		free(model->sv_coef[i]);	free(model->SV);	free(model->sv_coef);	free(model->rho);	free(model->label);	free(model->probA);	free(model->probB);	free(model->nSV);	free(model);}void svm_destroy_param(svm_parameter* param){	free(param->weight_label);	free(param->weight);}const char *svm_check_parameter(const svm_problem *prob, const svm_parameter *param){	// svm_type	int svm_type = param->svm_type;	if(svm_type != C_SVC &&	   svm_type != NU_SVC &&	   svm_type != ONE_CLASS &&	   svm_type != EPSILON_SVR &&	   svm_type != NU_SVR)		return "unknown svm type";		// kernel_type		int kernel_type = param->kernel_type;	if(kernel_type != LINEAR &&	   kernel_type != POLY &&	   kernel_type != RBF &&// Canasai's addition begin	   kernel_type != STRING &&// Canasai's addition end	   kernel_type != SIGMOID)		return "unknown kernel type";// Canasai's addition begin	if( param->gamma < 0 || param->gamma > 3 )		return "unknown string kernel type: gamma < 0 or gamma > 3";	if( param->mc_method != 0 && param->mc_method != 1 )		return "unknown multi-class classification method: mc_method != 0 and mc_method != 1";// Canasai's addition end	// cache_size,eps,C,nu,p,shrinking	if(param->cache_size <= 0)		return "cache_size <= 0";	if(param->eps <= 0)		return "eps <= 0";	if(svm_type == C_SVC ||	   svm_type == EPSILON_SVR ||	   svm_type == NU_SVR)		if(param->C <= 0)			return "C <= 0";	if(svm_type == NU_SVC ||	   svm_type == ONE_CLASS ||	   svm_type == NU_SVR)		if(param->nu < 0 || param->nu > 1)			return "nu < 0 or nu > 1";	if(svm_type == EPSILON_SVR)		if(param->p < 0)			return "p < 0";	if(param->shrinking != 0 &&	   param->shrinking != 1)		return "shrinking != 0 and shrinking != 1";	if(param->probability != 0 &&	   param->probability != 1)		return "probability != 0 and probability != 1";	if(param->probability == 1 &&	   svm_type == ONE_CLASS)		return "one-class SVM probability output not supported yet";	// check whether nu-svc is feasible		if(svm_type == NU_SVC)	{		int l = prob->l;		int max_nr_class = 16;		int nr_class = 0;		int *label = Malloc(int,max_nr_class);		int *count = Malloc(int,max_nr_class);		int i;		for(i=0;i<l;i++)		{			int this_label = (int)prob->y[i];			int j;			for(j=0;j<nr_class;j++)				if(this_label == label[j])				{					++count[j];					break;				}			if(j == nr_class)			{				if(nr_class == max_nr_class)				{					max_nr_class *= 2;					label = (int *)realloc(label,max_nr_class*sizeof(int));					count = (int *)realloc(count,max_nr_class*sizeof(int));				}				label[nr_class] = this_label;				count[nr_class] = 1;				++nr_class;			}		}			for(i=0;i<nr_class;i++)		{			int n1 = count[i];			for(int j=i+1;j<nr_class;j++)			{				int n2 = count[j];				if(param->nu*(n1+n2)/2 > min(n1,n2))				{					free(label);					free(count);					return "specified nu is infeasible";				}			}		}		free(label);		free(count);	}	return NULL;}int svm_check_probability_model(const svm_model *model){	return ((model->param.svm_type == C_SVC || model->param.svm_type == NU_SVC) &&		model->probA!=NULL && model->probB!=NULL) ||		((model->param.svm_type == EPSILON_SVR || model->param.svm_type == NU_SVR) &&		 model->probA!=NULL);}double dagsvm_predict(const struct svm_model *model, const struct svm_node *x){	if(model->param.svm_type == ONE_CLASS ||	   model->param.svm_type == EPSILON_SVR ||	   model->param.svm_type == NU_SVR)	{		double *sv_coef = model->sv_coef[0];		double sum = 0;		for(int i=0;i<model->l;i++)			sum += sv_coef[i] * Kernel::k_function(x,model->SV[i],model->param);		sum -= model->rho[0];		if(model->param.svm_type == ONE_CLASS)			return (sum>0)?1:-1;		else			return sum;	}	else	{		int i;		int nr_class = model->nr_class;		int l = model->l;		double *kvalue = Malloc(double,l);		for(i=0;i<l;i++)			kvalue[i] = Kernel::k_function(x,model->SV[i],model->param);		int *start = Malloc(int,nr_class);		start[0] = 0;		for(i=1;i<nr_class;i++)			start[i] = start[i-1]+model->nSV[i-1];		i = 0;		int j = nr_class - 1;		while (i < j)		{			double sum = 0;			int si = start[i];			int sj = start[j];			int ci = model->nSV[i];			int cj = model->nSV[j];			int k;			double *coef1 = model->sv_coef[j-1];			double *coef2 = model->sv_coef[i];			for(k=0;k<ci;k++)				sum += coef1[si+k] * kvalue[si+k];			for(k=0;k<cj;k++)				sum += coef2[sj+k] * kvalue[sj+k];			sum -= model->rho[i*(nr_class - 1) -				i*(i + 1)/2 + j - 1];			if(sum > 0)				j--;			else				i++;		}		free(kvalue);		free(start);		return model->label[i];	}}// Canasai's addition beginchar **load_label( char *label_file, int *max_classname_length ){    int num_tokens   = 0;    int num_lines    = 0;    int longest_line = 0;    text_scan( label_file, &num_tokens, &num_lines, &longest_line );    char *line = Malloc( char, longest_line );    char **label = Malloc( char *, num_lines );    FILE *file_ptr = fopen( label_file, "r" );    while( fgets( line, longest_line, file_ptr ) != NULL )    {        if( text_not_blank( line ) )        {            line[ strlen( line ) - 1 ] = '\0';            char *tmp_ptr = text_copy4( line );            char *tmp_ptr2 = tmp_ptr;            tmp_ptr2 = strchr( tmp_ptr2, ' ' );            if( tmp_ptr2 == NULL )            {                fprintf( stderr, "ERROR: read label file, %s\n", label_file );                exit( 0 );            }            *tmp_ptr2 = '\0';            ++tmp_ptr2;            int index = ( int )atof( tmp_ptr );            label[index]  = text_copy( tmp_ptr2 );            int label_length = strlen( label[index] );            if( label_length > *max_classname_length )                *max_classname_length = label_length;            free( tmp_ptr );        }    }    free( line );    fclose( file_ptr );    return( label );}// modified from stratified cross validationvoid sk_svm_cross_validation( const struct svm_problem *prob,                              const struct svm_parameter *param,                              double *target,                              char **label_name,                              int max_classname_length,                              int *nr_class_ ){    int i;    int nr_fold = param->nr_fold;    int *fold_start = Malloc( int, nr_fold + 1 );    int l = prob->l;    int *perm = Malloc( int, l );    int nr_class;    if( param->svm_type == C_SVC /*|| param->svm_type == NU_SVC*/ )    {        int *start = NULL;        int *label = NULL;        int *count = NULL;        svm_group_classes( prob, &nr_class, &label, &start, &count, perm );        (*nr_class_) = nr_class;        // random shuffle and then data grouped by fold using the array perm        int *fold_count = Malloc( int, nr_fold );        int c;        int *index = Malloc( int, l );        for( i = 0; i < l; i++ )            index[i] = perm[i];/*        for( c = 0; c < nr_class; c++ )        {            for( i = 0; i < count[c]; i++ )            {                int j = i + rand() % ( count[c] - i );                swap( index[start[c]+j], index[start[c]+i] );            }        }*/        for( i = 0; i < nr_fold; i++ )        {            fold_count[i] = 0;            for( c = 0; c < nr_class; c++ )                fold_count[i] += ( i+1 ) * count[c] / nr_fold - i * count[c] / nr_fold;        }        fold_start[0] = 0;        for( i = 1; i <= nr_fold; i++ )            fold_start[i] = fold_start[i-1] + fold_count[i-1];        for( c = 0; c < nr_class; c++ )        {            for( i = 0; i < nr_fold; i++ )            {                int begin = start[c] + i * count[c] / nr_fold;                int end = start[c] + ( i+1 ) * count[c] / nr_fold;                for( int j = begin; j < end; j++ )                {                    perm[fold_start[i]] = index[j];                    fold_start[i]++;                }            }        }        fold_start[0] = 0;        for( i = 1; i <= nr_fold; i++ )            fold_start[i] = fold_start[i-1] + fold_count[i-1];        free( start );        free( label );        free( count );        free( index );        free( fold_count );    }    else    {        for( i = 0; i < l; i++ )            perm[i] = i;        for( i = 0; i < l; i++ )        {            int j = i + rand() % ( l-i );            swap( perm[i], perm[j] );        }        for( i = 0; i <= nr_fold; i++ )            fold_start[i] = i * l / nr_fold;    }    for( i = 0; i < nr_fold; i++ )    {        int begin = fold_start[i];        int end = fold_start[i+1];        int j, k;        struct svm_problem subprob;        subprob.l = l - ( end - begin );        subprob.x = Malloc( struct svm_node *, subprob.l );        subprob.y = Malloc( double, subprob.l );        k = 0;        for( j = 0; j < begin; j++ )        {            subprob.x[k] = prob->x[perm[j]];            subprob.y[k] = prob->y[perm[j]];            ++k;        }        for( j = end; j < l; j++ )        {            subprob.x[k] = prob->x[perm[j]];            subprob.y[k] = prob->y[perm[j]];            ++k;        }        struct svm_model *submodel = svm_train( &subprob, param );        int **conf_mat = ( int ** )malloc( nr_class * sizeof( int * ) );        for( j = 0; j < nr_class; j++  )        {            conf_mat[j] = ( int * )malloc( nr_class * sizeof( int ) );            for( k = 0; k < nr_class; k++  )                conf_mat[j][k]=0;        }        for( j = begin; j < end; j++ )        {            if( param->mc_method == ONE_AGAINST_ONE )                target[perm[j]] 

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -