⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm_common.cpp

📁 目前的svm(支持向量机)分类算法开源实现如svmlight和svmlib都没有界面
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include "stdafx.h"
#include "svm.h"
#include "svm_common.h"
#include "math.h"


#ifdef _DEBUG
#undef THIS_FILE
static char THIS_FILE[]=__FILE__;
#define new DEBUG_NEW
#endif




static char temstr[200];
/* classifies example */
double classify_example(MODEL *model,DOC *ex)
{
	register long i;
	register double dist;
	
	dist=0;
	for(i=1;i<model->sv_num;i++) {  
		dist+=kernel(&model->kernel_parm,model->supvec[i],ex)*model->alpha[i];
	}
	return(dist-model->b);
}

/*    classifies example for linear kernel 
important: the model must have the linear weight vector computed 
important: the feature numbers in the example to classify must 
not be larger than the weight vector!               */
double classify_example_linear(MODEL *model,DOC *ex)
{
	return((double)(sprod_ns(model->lin_weights,ex->words)-model->b));
}

/* calculate the kernel function */
CFLOAT kernel(KERNEL_PARM *kernel_parm,DOC *a,DOC*b)
{
	com_result.kernel_cache_statistic++;
	switch(kernel_parm->kernel_type)
	{
    case 0: /* linear */ 
		return((CFLOAT)sprod_ss(a->words,b->words)); 
    case 1: /* polynomial */
		return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)); 
    case 2: /* radial basis function */
		return((CFLOAT)exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq)));
    case 3: /* sigmoid neural net */
		return((CFLOAT)tanh(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const)); 
    case 4: /* custom-kernel supplied in file kernel.h*/
		return((CFLOAT)custom_kernel(kernel_parm,a,b)); 
		//chen .test sum of 
		//return((CFLOAT)pow(kernel_parm->coef_lin*sprod_ss(a->words,b->words)+kernel_parm->coef_const,(double)kernel_parm->poly_degree)+exp(-kernel_parm->rbf_gamma*(a->twonorm_sq-2*sprod_ss(a->words,b->words)+b->twonorm_sq))); 
    default: sprintf(temstr,"Error: Unknown kernel function");
		printm(temstr);
		return (-1);
	}
}

/* compute the inner product of two sparse vectors */
double sprod_ss(SVM_WORD *a,SVM_WORD*b)
{
    register FVAL sum=0;
    register SVM_WORD *ai,*bj;
    ai=a;
    bj=b;
    while (ai->wnum && bj->wnum) {
		if(ai->wnum > bj->wnum) {
			bj++;
		}
		else if (ai->wnum < bj->wnum) {
			ai++;
		}
		else {
			sum+=ai->weight * bj->weight;
			ai++;
			bj++;
		}
    }
    return((double)sum);
}



/* compute the inner product of two sparse vectors,b right shit 1 bit */
double sprod_ss1(SVM_WORD *a,SVM_WORD*b,int offset)
{
    register FVAL sum=0;
    register SVM_WORD *ai,*bj;
    ai=a;
    bj=b;
    while (ai->wnum && bj->wnum) {
		if(ai->wnum > bj->wnum+offset) {
			bj++;
		}
		else if (ai->wnum < bj->wnum+offset) {
			ai++;
		}
		else 
		{
			int np=(ai->wnum-1)%16+1+offset;
			if (np>0 && np<17) 
				sum+=ai->weight * bj->weight;
			ai++;
			bj++;
		}
    }
    return((double)sum);
}

double sprod_ss2(SVM_WORD *a,SVM_WORD*b,int offset)
{
    register FVAL sum=0;
    register SVM_WORD *ai,*bj;
    ai=a;
    bj=b;
    while (ai->wnum && bj->wnum) {
		if(ai->wnum > bj->wnum+offset) {
			bj++;
		}
		else if (ai->wnum < bj->wnum+offset) {
			ai++;
		}
		else 
		{
			int np=ai->wnum+offset;
			if (np>0 && np<257) 
				sum+=ai->weight * bj->weight;
			ai++;
			bj++;
		}
    }
    return((double)sum);
}

/* compute length of weight vector */
double model_length_s(MODEL *model,KERNEL_PARM *kernel_parm)
{
	register long i,j;
	register double sum=0,alphai;
	register DOC *supveci;
	
	for(i=1;i<model->sv_num;i++) {  
		alphai=model->alpha[i];
		supveci=model->supvec[i];
		for(j=1;j<model->sv_num;j++) {
			sum+=alphai*model->alpha[j]
				*kernel(kernel_parm,supveci,model->supvec[j]);
		}
	}
	return(sqrt(sum));
}

void clear_vector_n(double *vec,long n)
{
	register long i;
	for(i=0;i<=n;i++) vec[i]=0;
}

void add_vector_ns(double *vec_n,SVM_WORD *vec_s,double faktor)
{
	register SVM_WORD *ai;
	ai=vec_s;
	while (ai->wnum) {
		vec_n[ai->wnum]+=(faktor*ai->weight);
		ai++;
	}
}

double sprod_ns(double *vec_n,SVM_WORD *vec_s)
{
	register double sum=0;
	register SVM_WORD *ai;
	ai=vec_s;
	while (ai->wnum) {
		sum+=(vec_n[ai->wnum]*ai->weight);
		ai++;
	}
	return(sum);
}

/* compute weight vector in linear case and add to model*/
void add_weight_vector_to_linear_model(MODEL *model)
{
	long i;
	
	model->lin_weights=(double *)my_malloc(sizeof(double)*(model->totwords+1));
	clear_vector_n(model->lin_weights,model->totwords);
	for(i=1;i<model->sv_num;i++) {
		add_vector_ns(model->lin_weights,(model->supvec[i])->words,
			model->alpha[i]);
	}
}

int read_model(char *modelfile,MODEL *model,long max_words,long ll)
{
	FILE *modelfl;
	long j,i;
	char *line;
	SVM_WORD *words;
	register long wpos;
	long wnum,pos;
	double weight;
	char version_buffer[100];

	if (com_pro.show_action)
	{
	
		sprintf(temstr,"Reading model..."); 
		printm(temstr); 
	}
	
	words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words+10));
	line = (char *)my_malloc(sizeof(char)*ll);
	
	if ((modelfl = fopen (modelfile, "r")) == NULL)
	{
		printe (modelfile);  
		return -1;
	}
	
	fscanf(modelfl,"SVM-light Version %s\n",version_buffer);
	
	if(strcmp(version_buffer,VERSION)) 
	{
		printe ("Version of model-file does not match version of svm_classify!"); 
		return -1;
	}

	
	fscanf(modelfl,"%ld # kernel type\n",&(model->kernel_parm.kernel_type));
	fscanf(modelfl,"%ld # kernel parameter -d \n",&(model->kernel_parm.poly_degree));
	fscanf(modelfl,"%lf # kernel parameter -g \n",&(model->kernel_parm.rbf_gamma));
	fscanf(modelfl,"%lf # kernel parameter -s \n",&(model->kernel_parm.coef_lin));
	fscanf(modelfl,"%lf # kernel parameter -r \n",&(model->kernel_parm.coef_const));
	fscanf(modelfl,"%s # kernel parameter -u \n",&(model->kernel_parm.custom));
	fscanf(modelfl,"%ld # highest feature index \n",&(model->totwords));
	fscanf(modelfl,"%ld # number of training documents \n",&(model->totdoc));
	fscanf(modelfl,"%ld # number of support vectors plus 1 \n",&(model->sv_num));
	fscanf(modelfl,"%lf # threshold b \n",&(model->b));
	
	for(i=1;i<model->sv_num;i++) 
	{
		fgets(line,(int)ll,modelfl);
		pos=0;
		wpos=0;
		sscanf(line,"%lf",&model->alpha[i]);
		while(line[++pos]>' ');
		while((sscanf(line+pos,"%ld:%lf",&wnum,&weight) != EOF) && (wpos<max_words)) 
		{
			while(line[++pos]>' ');
			words[wpos].wnum=wnum;
			words[wpos].weight=weight; 
			wpos++;
		} 
		model->supvec[i] = (DOC *)my_malloc(sizeof(DOC));
		(model->supvec[i])->words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(wpos+1));
		for(j=0;j<wpos;j++)
		{
			(model->supvec[i])->words[j]=words[j]; 
		}
		((model->supvec[i])->words[wpos]).wnum=0;
		(model->supvec[i])->twonorm_sq = sprod_ss((model->supvec[i])->words,(model->supvec[i])->words);
		(model->supvec[i])->docnum = -1;
	}
	fclose(modelfl);
	free(line);
	free(words);
	if (com_pro.show_readfile)
	{
		sprintf(temstr, "OK. (%d support vectors read)",(int)(model->sv_num-1));
		printm(temstr);
	}
}
/*read the data from text documents*/
int read_documents(char *docfile,
					DOC  *docs,
					long *label,
					long max_words_doc,
					long ll,
					long *totwords,
					long *totdoc)
{
	char *line;
	DOC doc;
	long dnum=0,wpos,i,dpos=0,dneg=0,dunlab=0;
	long doc_label;
	FILE *docfl;

	line = (char *)my_malloc(sizeof(char)*ll);
	if ((docfl = fopen (docfile, "r")) == NULL)
	{ 
		printe (docfile);  
		return -1;
	}
	
	doc.words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*(max_words_doc+10));
	if (com_pro.show_readfile)
	{
		sprintf(temstr,"Reading examples into memory..."); 
		printm(temstr);
	}
	
	dnum=0;
	(*totwords)=0;
	while((!feof(docfl)) && fgets(line,(int)ll,docfl)) {
		if(line[0] == '#') continue;  /* line contains comments */
		if(!parse_document(line,&doc,&doc_label,&wpos,max_words_doc)) 
		{
			sprintf(temstr,"Parsing error in line %ld!",dnum);
			printm(temstr);
		}
		label[dnum]=doc_label;
		if(doc_label > 0) dpos++;
		if (doc_label < 0) dneg++;
		if (doc_label == 0) dunlab++;
		if((wpos>1) && ((doc.words[wpos-2]).wnum>(*totwords))) 
			(*totwords)=(doc.words[wpos-2]).wnum;
		docs[dnum].words = (SVM_WORD *)my_malloc(sizeof(SVM_WORD)*wpos);
		docs[dnum].docnum=dnum;
		for(i=0;i<wpos;i++) 
			docs[dnum].words[i]=doc.words[i];
		docs[dnum].twonorm_sq=doc.twonorm_sq;
		dnum++;  
		if((dnum % 100) == 0&&com_pro.show_readfile) 
		{
			sprintf(temstr,"read %ld..",dnum); 
			printm(temstr);
		}
	} 
	
	fclose(docfl);
	free(line);
	free(doc.words);
	if (com_pro.show_action)
	{
		sprintf(temstr, "OK. (%ld examples read)", dnum);
		printm(temstr);
		sprintf(temstr,"%ld positive, %ld negative, and %ld unlabeled examples.",dpos,dneg,dunlab); 
		printm(temstr);
	}
	(*totdoc)=dnum;
}
/*Parse one  line of data file */
int parse_document(char *line,DOC *doc,long *label,long*numwords,long max_words_doc)

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -