⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 svm_learn_main.c

📁 SVM_light是一种非常流行的支持向量机的java接口
💻 C
📖 第 1 页 / 共 2 页
字号:
/***********************************************************************/
/*                                                                     */
/*   svm_learn_main.c                                                  */
/*                                                                     */
/*   Command line interface to the learning module of the              */
/*   Support Vector Machine.                                           */
/*                                                                     */
/*   Author: Thorsten Joachims                                         */
/*   Date: 02.07.02                                                    */
/*                                                                     */
/*   Copyright (c) 2000  Thorsten Joachims - All rights reserved       */
/*                                                                     */
/*   This software is available for non-commercial use only. It must   */
/*   not be modified and distributed without prior permission of the   */
/*   author. The author is not responsible for implications from the   */
/*   use of this software.                                             */
/*                                                                     */
/***********************************************************************/


/* uncomment, if you want to use svm-learn out of C++ */
/* extern "C" { */ 
# include "svm_common.h"
# include "svm_learn.h"
/*}*/ 

char docfile[200];           /* file with training examples */
char modelfile[200];         /* file for resulting classifier */
char restartfile[200];       /* file with initial alphas */

void   read_input_parameters(int, char **, char *, char *, char *, long *, 
			     LEARN_PARM *, KERNEL_PARM *);
void   wait_any_key();
void   print_help();



int main (int argc, char* argv[])
{  
  DOC **docs;  /* training examples */
  long totwords,totdoc,i;
  double *target;
  double *alpha_in=NULL;
  KERNEL_CACHE *kernel_cache;
  LEARN_PARM learn_parm;
  KERNEL_PARM kernel_parm;
  MODEL *model=(MODEL *)my_malloc(sizeof(MODEL));

  read_input_parameters(argc,argv,docfile,modelfile,restartfile,&verbosity,
			&learn_parm,&kernel_parm);
  read_documents(docfile,&docs,&target,&totwords,&totdoc);
  if(restartfile[0]) alpha_in=read_alphas(restartfile,totdoc);

	FILE * dump = NULL;
	char* traindump = (char *) my_malloc(sizeof(char)*25);
	sprintf(traindump,"maintraindump%d.dat",1);
	
	int lengthcnt = 20;
	int namecnt=2;
  while((dump = fopen(traindump,"r+")) != NULL) {
		fclose(dump);
		printf("traindump is already there: %s\n",traindump);
		if (strlen(traindump) >= lengthcnt) {
			free(traindump);
			lengthcnt =+ 20; 
			traindump = (char *) my_malloc(sizeof(char)*lengthcnt);
		}
		sprintf(traindump,"maintraindump%d.dat",namecnt++);
	}
	
	printf("------------------------------ Writing traindump to file %s",traindump);
  if ((dump = fopen(traindump,"w")) == NULL) {
		perror("Doesnt work!\n");
		exit(1);
	}
	
	printf("\n|||||||||||||||||||||||||||||||||| dumping ..\n");
	long int z = 0;
	long int y = 0;
  fprintf(dump,"totaldocuments: %ld \n",totdoc);
	while(z<(totdoc)) {
    fprintf(dump,"(%ld) (QID: %ld) (CF: %.16g) (SID: %ld) ",docs[z]->docnum,docs[z]->queryid,docs[z]->costfactor,docs[z]->slackid);
		SVECTOR *v = docs[z]->fvec;
    fprintf(dump,"(NORM:%.32g) (UD:%s) (KID:%ld) (VL:%p) (F:%.32g) %.32g ",v->twonorm_sq,(v->userdefined == NULL ? "" : v->userdefined),v->kernel_id,v->next,v->factor,target[z]);
		if (v != NULL && v->words != NULL) {
			while ((v->words[y]).wnum) {
				fprintf(dump,"%ld:%.32g ",(v->words[y]).wnum, (v->words[y]).weight);
				y++;
			}
		} else 
				fprintf(dump, "NULL WORTE\n");
		fprintf(dump,"\n");
		y=0;
		z++;
  }

	
	fprintf(dump,"---------------------------------------------------\n");
	fprintf(dump,"kernel_type: %ld\n",kernel_parm.kernel_type);
	fprintf(dump,"poly_degree: %ld\n",kernel_parm.poly_degree);
	fprintf(dump,"rbf_gamma: %.32g\n",kernel_parm.rbf_gamma);
	fprintf(dump,"coef_lin: %.32g\n",kernel_parm.coef_lin);
	fprintf(dump,"coef_const: %.32g\n",kernel_parm.coef_const);
	fprintf(dump,"custom: %s\n",kernel_parm.custom);
	
	fprintf(dump,"type: %ld\n",learn_parm.type);
	fprintf(dump,"svm_c: %.32g\n",learn_parm.svm_c);
	fprintf(dump,"eps: %.32g\n",learn_parm.eps);
	fprintf(dump,"svm_costratio: %.32g\n",learn_parm.svm_costratio);
	fprintf(dump,"transduction_posratio: %.32g\n",learn_parm.transduction_posratio);
	fprintf(dump,"biased_hyperplane: %ld\n",learn_parm.biased_hyperplane);
	fprintf(dump,"svm_maxqpsize: %ld\n",learn_parm.svm_maxqpsize);
	fprintf(dump,"svm_newvarsinqp: %ld\n",learn_parm.svm_newvarsinqp);
	fprintf(dump,"epsilon_crit: %.32g\n",learn_parm.epsilon_crit);
	fprintf(dump,"epsilon_shrink: %.32g\n",learn_parm.epsilon_shrink);
	fprintf(dump,"svm_iter_to_shrink: %ld\n",learn_parm.svm_iter_to_shrink);
	fprintf(dump,"remove_inconsistent: %ld\n",learn_parm.remove_inconsistent);
	fprintf(dump,"skip_final_opt_check: %ld\n",learn_parm.skip_final_opt_check);
	fprintf(dump,"compute_loo: %ld\n",learn_parm.compute_loo);
	fprintf(dump,"rho: %.32g\n",learn_parm.rho);
	fprintf(dump,"xa_depth: %ld\n",learn_parm.xa_depth);
	fprintf(dump,"predfile: %s\n",learn_parm.predfile);
	fprintf(dump,"alphafile: %s\n",learn_parm.alphafile);
	fprintf(dump,"epsilon_const: %.32g\n",learn_parm.epsilon_const);
	fprintf(dump,"epsilon_a: %.32g\n",learn_parm.epsilon_a);
	fprintf(dump,"opt_precision: %.32g\n",learn_parm.opt_precision);
	fprintf(dump,"svm_c_steps: %ld\n",learn_parm.svm_c_steps);
	fprintf(dump,"svm_c_factor: %.32g\n",learn_parm.svm_c_factor);
	fprintf(dump,"svm_costratio_unlab: %.32g\n",learn_parm.svm_costratio_unlab);
	fprintf(dump,"svm_unlabbound: %.32g\n",learn_parm.svm_unlabbound);

	
  if(kernel_parm.kernel_type == LINEAR) { /* don't need the cache */
    kernel_cache=NULL;
  }
  else {
    /* Always get a new kernel cache. It is not possible to use the
       same cache for two different training runs */
    kernel_cache=kernel_cache_init(totdoc,learn_parm.kernel_cache_size);
  }

  if(learn_parm.type == CLASSIFICATION) {
    svm_learn_classification(docs,target,totdoc,totwords,&learn_parm,
			     &kernel_parm,kernel_cache,model,alpha_in);
  }
  else if(learn_parm.type == REGRESSION) {
    svm_learn_regression(docs,target,totdoc,totwords,&learn_parm,
			 &kernel_parm,&kernel_cache,model);
  }
  else if(learn_parm.type == RANKING) {
    svm_learn_ranking(docs,target,totdoc,totwords,&learn_parm,
		      &kernel_parm,&kernel_cache,model);
  }
  else if(learn_parm.type == OPTIMIZATION) {
    svm_learn_optimization(docs,target,totdoc,totwords,&learn_parm,
			   &kernel_parm,kernel_cache,model,alpha_in);
  }
	
	fprintf(dump,"totwords: %ld\n",learn_parm.totwords);
	
	printf("|||||||||||||||||||||||||||||||||| z: %ld, totdoc: %ld\n",z,totdoc);
	
	fclose(dump);

  if(kernel_cache) {
    /* Free the memory used for the cache. */
    kernel_cache_cleanup(kernel_cache);
  }

  /* Warning: The model contains references to the original data 'docs'.
     If you want to free the original data, and only keep the model, you 
     have to make a deep copy of 'model'. */
  /* deep_copy_of_model=copy_model(model); */
  write_model(modelfile,model);

  free(alpha_in);
  free_model(model,0);
  for(i=0;i<totdoc;i++) 
    free_example(docs[i],1);
  free(docs);
  free(target);

  return(0);
}

/*---------------------------------------------------------------------------*/

void read_input_parameters(int argc,char *argv[],char *docfile,char *modelfile,
			   char *restartfile,long *verbosity,
			   LEARN_PARM *learn_parm,KERNEL_PARM *kernel_parm)
{
  long i;
  char type[100];
  
  /* set default */
  strcpy (modelfile, "svm_model");
  strcpy (learn_parm->predfile, "trans_predictions");
  strcpy (learn_parm->alphafile, "");
  strcpy (restartfile, "");
  (*verbosity)=1;
  learn_parm->biased_hyperplane=1;
  learn_parm->sharedslack=0;
  learn_parm->remove_inconsistent=0;
  learn_parm->skip_final_opt_check=0;
  learn_parm->svm_maxqpsize=10;
  learn_parm->svm_newvarsinqp=0;
  learn_parm->svm_iter_to_shrink=-9999;
  learn_parm->maxiter=100000;
  learn_parm->kernel_cache_size=40;
  learn_parm->svm_c=0.0;
  learn_parm->eps=0.1;
  learn_parm->transduction_posratio=-1.0;
  learn_parm->svm_costratio=1.0;
  learn_parm->svm_costratio_unlab=1.0;
  learn_parm->svm_unlabbound=1E-5;
  learn_parm->epsilon_crit=0.001;
  learn_parm->epsilon_a=1E-15;
  learn_parm->compute_loo=0;
  learn_parm->rho=1.0;
  learn_parm->xa_depth=0;
  kernel_parm->kernel_type=0;
  kernel_parm->poly_degree=3;
  kernel_parm->rbf_gamma=1.0;
  kernel_parm->coef_lin=1;
  kernel_parm->coef_const=1;
  strcpy(kernel_parm->custom,"empty");
  strcpy(type,"c");

  for(i=1;(i<argc) && ((argv[i])[0] == '-');i++) {
    switch ((argv[i])[1]) 
      { 
      case '?': print_help(); exit(0);
      case 'z': i++; strcpy(type,argv[i]); break;
      case 'v': i++; (*verbosity)=atol(argv[i]); break;
      case 'b': i++; learn_parm->biased_hyperplane=atol(argv[i]); break;
      case 'i': i++; learn_parm->remove_inconsistent=atol(argv[i]); break;
      case 'f': i++; learn_parm->skip_final_opt_check=!atol(argv[i]); break;
      case 'q': i++; learn_parm->svm_maxqpsize=atol(argv[i]); break;
      case 'n': i++; learn_parm->svm_newvarsinqp=atol(argv[i]); break;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -