⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 train_svm.c

📁 linux下的源码分类器SVM
💻 C
字号:
/* Copyright 2001, 2002, 2003, 2004 Yann Guermeur and Andre Elisseeff               *//* This program is free software; you can redistribute it and/or modify       *//* it under the terms of the GNU General Public License as published by       *//* the Free Software Foundation; either version 2 of the License, or          *//* (at your option) any later version.                                        *//* This program is distributed in the hope that it will be useful,            *//* but WITHOUT ANY WARRANTY; without even the implied warranty of             *//* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the              *//* GNU General Public License for more details.                               *//* You should have received a copy of the GNU General Public License          *//* along with this program; if not, write to the Free Software                *//* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA  *//*----------------------------------------------------------------------------*//*  Name           : train_SVM.c                                              *//*  Version        : 1.1                                                      *//*  Creation       : 04/27/00                                                 *//*  Last update    : 06/30/08                                                 *//*  Subject        : Training algorithm of a M-SVM                            *//*  Algo.          : Frank-Wolfe algorithm + decomposition method             *//*  Author         : Yann Guermeur Yann.Guermeur@loria.fr                     *//*----------------------------------------------------------------------------*/#include <stdio.h>#include <stdlib.h> #include <string.h>#include <math.h>#include <ctype.h>#include "algebre.h"#define minimum(a,b) ((a)<=(b)?(a):(b))#define maximum(a,b) ((a)>=(b)?(a):(b))#define true 1#define false 0#define taille 81#define pas 100#define small 1e-3#define negligeable 1e-8FILE *fs, *fc;unsigned short xi[3];long jump=false, nature_kernel=0,i, j, k, l, m, dim_input, nb_data, iter, nb_iter=10000000, *y,y_i, y_j, Q=0, chunk_size=0, *table_chunk, *in_chunk, ind_pattern,nrand48(), random_num, rank, nb_points_chunk, active, nb_active_var,*table_active, nb_active_var_in_chunk, half_chunk_size=0;char fichier_data[taille], fichier_alpha[taille], letter, commande[taille],conf[taille], fichier_lp_sol[taille]="lp.res",*ligne, *frag, fichier_fichcom[taille], fichier_alpha_0[taille],*tampon[6] = {"00000","0000","000","00","0",""},fichier_lp[taille]="lp.lp";double **X, **alpha, C=0.0, **gradient, **delta, value,obj_lp_lu=0.0, obj_lp_comp=0.0, **lp_sol, theta_opt=0.0,partiel, **K, **H_delta, gradient_alpha,gradient_Gamma, *rhs, gradient_diff;/* Functions included in this program */long indice_tampon(long parametre);void caract_db();void alloc_memory();void read_data();void read_alpha();void compute_table_chunk();void compute_K();void compute_gradient();void write_lp_file();void read_lp_sol();void check_opt_sol();void compute_delta();void compute_H_delta();void compute_theta_opt();void compute_new_alpha();void write_vector(double **vector, char *fichier);void Pause(char *message);main(int argc, char *argv[]){system("clear");srand48(getpid());strcpy(fichier_fichcom, argv[1]);caract_db();read_data();read_alpha();for(iter=1; iter<=nb_iter; iter++)  {  if((iter%pas) == 0)    printf("\n\n*** Iteration: %6d\n", iter);  jump = false;  compute_table_chunk();  compute_K();  compute_gradient();  if(jump == false)    {    write_lp_file();    system("./lp_solve < lp.lp > lp.res");    read_lp_sol();    }  if(jump == false)    check_opt_sol();  if(jump == false)    {    compute_delta();    compute_theta_opt();    compute_new_alpha();    if((iter%pas) == 0)      write_vector(alpha, fichier_alpha);    }  } }long indice_tampon(long parametre){long valeur_long;valeur_long = (long) floor(log10((double) parametre));return valeur_long;}void Pause(char *message){char c;static int n = 0;if(message!=NULL)  printf("%s\n",message);printf("\a\n=> New Line to proceed...\n");n = read(0, &c, 1);while(c != '\n')  n = read(0, &c, 1);return;}void caract_db(){if((fs=fopen(fichier_fichcom, "r"))==NULL)  {  printf("\nFile of parameters %s: cannot be open...\n", fichier_fichcom);  exit(0);  }fscanf(fs, "%d", &Q);fscanf(fs, "%d", &nature_kernel);fscanf(fs, "%lf", &C);fscanf(fs, "%d", &chunk_size);half_chunk_size = chunk_size / 2;fscanf(fs, "%s", fichier_data);printf("\nThe data file is: %s\n", fichier_data);fscanf(fs, "%s", fichier_alpha_0);fscanf(fs, "%s", fichier_alpha);fclose(fs);sprintf(commande, "cp %s Save_alpha/.", fichier_alpha);}void alloc_memory(){X = matrix(nb_data, dim_input);y = (long *) calloc(nb_data+1, sizeof(long));alpha = matrix(nb_data, Q);in_chunk = (long *) calloc(nb_data+1, sizeof(long));gradient = matrix(chunk_size, Q);lp_sol = matrix(chunk_size, Q);delta = matrix(chunk_size, Q);table_chunk = (long *) calloc(chunk_size+1, sizeof(long));table_active = (long *) calloc(nb_data+1, sizeof(long));K = matrix(chunk_size, nb_data);H_delta = matrix(chunk_size, Q);rhs = (double *) calloc(Q+1, sizeof(double));ligne = (char *) calloc(taille+1, sizeof(char));frag = (char *) calloc(taille+1, sizeof(char));}void read_data(){long min_y = 1000;double value_y;if((fs=fopen(fichier_data, "r"))==NULL)  {  printf("\nData file %s: cannot be open...\n", fichier_data);  exit(0);  }fscanf(fs, "%d", &nb_data);fscanf(fs, "%d", &dim_input);alloc_memory();for(i=1; i<=nb_data; i++)  {  for(j=1; j<=dim_input; j++)    fscanf(fs, "%lf", &X[i][j]);  fscanf(fs, "%lf", &value_y);  y[i] = (long) value_y;  if(y[i] < min_y)     min_y = y[i];  }fclose(fs);printf("\nThe file of the data has been read...\n");sleep(1);if((min_y != 0) && (min_y != 1))  {  printf("\nWrong numbering of the categories, minimal value is: %d\n", min_y);  exit(0);  }if(min_y == 0)  {  printf("\nStandardizing the coding of the categories...\n");  for(i=1; i<=nb_data; i++)    y[i]++;  }}void read_alpha(){if((fs=fopen(fichier_alpha_0, "r"))==NULL)  {  printf("\nFile of initial dual variable values %s: cannot be open...\n",   fichier_alpha_0);  exit(0);  }for(i=1; i<=nb_data; i++)  for(k=1; k<=Q; k++)    fscanf(fs, "%lf", &alpha[i][k]);fclose(fs);printf("\nThe file of the initial values of the dual variables has been read...\n");sleep(1);}void write_lp_file(){if((fc=fopen(fichier_lp, "w"))==NULL)  {  printf("\nFile lp %s: cannot be open...\n", fichier_lp);  exit(0);  }fprintf(fc, "min:");for(i=1; i<=chunk_size; i++)  {  ind_pattern = table_chunk[i];  y_i = y[ind_pattern];  for(k=1; k<=Q; k++)    if(k != y_i)      {      if(gradient[i][k] > 0.0)        fprintf(fc, " +%lf A0%s%d", gradient[i][k],        tampon[indice_tampon(Q*(i-1)+k)], (Q*(i-1)+k));      if(gradient[i][k] < 0.0)         fprintf(fc, " %lf A0%s%d", gradient[i][k],        tampon[indice_tampon(Q*(i-1)+k)], (Q*(i-1)+k));      }  }fprintf(fc, ";\n");for(k=1; k<Q; k++)  {  rhs[k] = 0.0;  for(i=1; i<=nb_data; i++)    if(in_chunk[i] == 0)      {      y_i = y[i];      for(l=1; l<=Q; l++)        {        if((y_i == k) && (l != k))          rhs[k] += alpha[i][l];        if((y_i != k) && (l == k))          rhs[k] -= alpha[i][l];	}      }  }for(k=1; k<Q; k++)  {  fprintf(fc, "CONST%1d: ", k);  for(i=1; i<=chunk_size; i++)    {    ind_pattern = table_chunk[i];    y_i = y[ind_pattern];    if(y_i == k)      {      for(l=1; l<=Q; l++)        {        if(l != k)          {          fprintf(fc, "-A0%s%d ",          tampon[indice_tampon(Q*(i-1)+l)],(Q*(i-1)+l));          }        }      }    else      fprintf(fc, "+A0%s%d ",      tampon[indice_tampon(Q*(i-1)+k)], (Q*(i-1)+k));    }  fprintf(fc, "= %lf;\n", rhs[k]);  }for(i=1; i<=chunk_size; i++)  {  ind_pattern = table_chunk[i];  y_i = y[ind_pattern];  for(k=1; k<=Q; k++)    if(k != y_i)      fprintf(fc, "A0%s%d < %lf;\n",      tampon[indice_tampon(Q*(i-1)+k)], (Q*(i-1)+k), C);  }fclose(fc);}void compute_table_chunk(){for(i=1; i<=nb_data; i++)  {  in_chunk[i] = 0;  table_active[i] = 0;  }nb_active_var = 0;for(i=1; i<=nb_data; i++)  {  active = 0;  k = 1;  while((active == 0) && (k <= Q))    {    if((alpha[i][k] > 0.0) && (alpha[i][k] < C))      {      active = 1;      nb_active_var++;      table_active[nb_active_var] = i;      }    k++;    }  }if((iter%pas) == 0)  printf("\nNumber of margin Sup. Vec.: %d", nb_active_var);nb_points_chunk = 0;nb_active_var_in_chunk = minimum(half_chunk_size, nb_active_var);  if(nb_active_var_in_chunk == nb_active_var)  for(nb_points_chunk=1; nb_points_chunk <= nb_active_var; nb_points_chunk++)    {    table_chunk[nb_points_chunk] = table_active[nb_points_chunk];    in_chunk[table_active[nb_points_chunk]] = 1;    }else  while(nb_points_chunk < nb_active_var_in_chunk)      {    random_num = nrand48(xi);    rank = (random_num % nb_active_var)+1;    if(in_chunk[table_active[rank]] == 0)      {      in_chunk[table_active[rank]] = 1;      nb_points_chunk++;      table_chunk[nb_points_chunk] = table_active[rank];      }    }nb_points_chunk = nb_active_var_in_chunk;while(nb_points_chunk < chunk_size)  {  random_num = nrand48(xi);  rank = (random_num % nb_data)+1;  if(in_chunk[rank] == 0)    {    in_chunk[rank] = 1;    nb_points_chunk++;    table_chunk[nb_points_chunk] = rank;    }  }}void compute_K(){for(i=1; i<=chunk_size; i++)  {  ind_pattern = table_chunk[i];  for(j=1; j<=nb_data; j++)    K[i][j] = ker(nature_kernel, X[ind_pattern], X[j], dim_input);  }}void compute_gradient(){double norm= 0.0;for(i=1; i<=chunk_size; i++)  {  y_i = y[table_chunk[i]];  for(k=1; k<=Q; k++)    {    if(k != y_i)      {      gradient[i][k] = -1.0;      for(j=1; j<=nb_data; j++)        {        y_j = y[j];	partiel = 0.0;        if(y_j == y_i)          for(l=1; l<=Q; l++)            partiel += alpha[j][l];        if(y_j == k)          for(l=1; l<=Q; l++)            partiel -= alpha[j][l];        partiel += alpha[j][k] - alpha[j][y_i];	gradient[i][k] += partiel * K[i][j];        }      }    }  }for(i=1; i<=chunk_size; i++)  for(k=1; k<=Q; k++)    norm += gradient[i][k] * gradient[i][k];norm = sqrt(norm);if(norm < negligeable)  {    printf("\nNorm of the gradient vector : %e\n", norm);  sleep(2);  if(chunk_size < nb_data)    jump = true;  else    {    printf("\n\nAn optimal solution has been found !\n\n");    write_vector(alpha, fichier_alpha);    exit(0);    }  printf("\nGradient vector\n");  display_mat(gradient, chunk_size, Q, 10, 6);  printf("\nProceed after the gradient of the chunk has been displayed...\n");  scanf("%s", conf);  }}void compute_H_delta(){for(i=1; i<=chunk_size; i++)  {  y_i = y[table_chunk[i]];  for(k=1; k<=Q; k++)    {    H_delta[i][k] = 0.0;     if(k != y_i)      {      for(j=1; j<=chunk_size; j++)        {        y_j = y[table_chunk[j]];        partiel = 0.0;        if(y_j == y_i)          for(l=1; l<=Q; l++)            partiel += delta[j][l];	if(y_j == k)	  for(l=1; l<=Q; l++)            partiel -= delta[j][l];        partiel += delta[j][k] - delta[j][y_i];        H_delta[i][k] += partiel * K[i][table_chunk[j]];	}      }    }  }}void read_lp_sol(){if((fs=fopen(fichier_lp_sol, "r"))==NULL)  {  printf("\nThe file %s of the LP solution cannot be open...\n",  fichier_lp_sol);  exit(0);  }fgets(ligne, taille, fs);if(strncmp("This problem is infeasible", ligne, 26) == 0)  {  printf("\nProblem with the LP...\n");  jump=true;  }else  {  frag = strtok(ligne, ":");  frag = strtok(NULL, ":");  obj_lp_lu = atof(frag);  for(i=1; i<=chunk_size; i++)    {    ind_pattern = table_chunk[i];    y_i = y[ind_pattern];    lp_sol[i][y_i]=0.0;    }  for(i=1; i<=chunk_size; i++)    for(k=1; k<Q; k++)      {      fscanf(fs, "%c", &letter);      fscanf(fs, "%d", &rank);      fscanf(fs, "%lf", &value);      j = (rank-1)/Q+1;      l = (rank-1)%Q+1;      lp_sol[j][l] = value;      fscanf(fs, "%c", &letter);      }  }fclose(fs);}void check_opt_sol(){gradient_alpha = 0.0; gradient_Gamma = 0.0;gradient_diff = 0.0;/*display_mat(gradient, chunk_size, Q, 11, 8);display_mat(lp_sol, chunk_size, Q, 11, 8);Pause("");*/for(i=1; i<=chunk_size; i++)  {  ind_pattern = table_chunk[i];  y_i = y[ind_pattern];  for(k=1; k<=Q; k++)    if(k != y_i)      {      gradient_alpha += gradient[i][k] * alpha[ind_pattern][k];      gradient_Gamma += gradient[i][k] * lp_sol[i][k];/*      printf("\ngradient^T alpha vs gradient^T gamma: %lf %lf", 	     gradient_alpha, gradient_Gamma);      Pause("");*/      }  }gradient_diff = gradient_alpha - gradient_Gamma;if((iter%pas) == 0)  printf("\ngradient^T (alpha - gamma): %lf", gradient_diff);if(gradient_diff <= 0.0)  {  if(gradient_diff < 0.0)    {    printf("\nInappropriate subset selection...\n");    printf("\ngradient^T (alpha - gamma): %lf", gradient_diff);    }  jump = true;  }/*else  {  printf("\nAppropriate subset selection...\n");  Pause("");  }*/if(fabs((gradient_Gamma - obj_lp_lu) / obj_lp_lu) > small)  {  printf("\nInaccuracy in the LP problem specification, "  "gradient^T gamma = %lf versus %lf (read), \n\n",   gradient_Gamma, obj_lp_lu);  }}void compute_new_alpha(){for(i=1; i<=chunk_size; i++)  {  ind_pattern = table_chunk[i];  for(k=1; k<=Q; k++)    {    alpha[ind_pattern][k] *= (1.0-theta_opt);    alpha[ind_pattern][k] += theta_opt * lp_sol[i][k];    }  }}void compute_delta(){for(i=1; i<=chunk_size; i++)  {  ind_pattern = table_chunk[i];  for(k=1; k<=Q; k++)    delta[i][k] = alpha[ind_pattern][k] - lp_sol[i][k];  }}void compute_theta_opt(){double denominateur = 0.0;compute_H_delta();for(i=1; i<=chunk_size; i++)  for(k=1; k<=Q; k++)    denominateur += delta[i][k] * H_delta[i][k];if(denominateur <=0)  {  printf("\nWrong estimate of theta...\n");  exit(0);  }theta_opt = gradient_diff / denominateur;if(theta_opt > 1.0)  theta_opt = 1.0;if((iter%pas) == 0)  printf("\n    Optimal value of theta: %10.8f\n", theta_opt);}void write_vector(double **vector, char *fichier){if((fc=fopen(fichier, "w"))==NULL)  {  printf("\nThe file %s cannot be open...\n", fichier);  exit(0);  }for(i=1; i<=nb_data; i++)  for(k=1; k<=Q; k++)    fprintf(fc, "%15.10f %c", vector[i][k], (k==Q) ? '\n': ' ');fclose(fc);system(commande);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -