📄 psmo_solver.cpp

📁 支持向量分类算法在linux操作系统下的是实现
💻 CPP
📖 第 1 页 / 共 2 页
字号:
上一页 12
{  int j;  NEXT_RAND = 1;  for (int i=0; i<n; ++i)  {    do      {	j = next_rand_pos() % l;      } while (work_status[j] != WORK_N);    work_status[j] = WORK_B;  }  int k=0; j=0;  for(int i=0; i<l; ++i)    {      if(work_status[i] == WORK_B)	{	  work_set[j] = i; ++j;	  work_count[i] = 0;	}      else	{	  not_work_set[k] = i; ++k;	  work_count[i] = -1;	}    }}void Solver_Parallel_SMO::Solve(int l, const QMatrix& Q, const double *b_, 				const schar *y_, double *alpha_, double Cp, 				double Cn, double eps, SolutionInfo* si, 				int shrinking){  double total_time = MPI_Wtime();  double problem_setup_time = 0;  double inner_solver_time = 0;  double gradient_updating_time = 0;  double time = 0;  // Initialization  this->l = l;  this->active_size = l;  this->Q = &Q;  this->si = si;  QD = Q.get_QD();  this->si = si;  clone(b,b_,l);  clone(y,y_,l);  clone(alpha,alpha_,l);  this->Cp = Cp;  this->Cn = Cn;  this->eps = eps;  this->lmn = l - n;  this->G_n = new double[lmn];  int *work_set = new int[n];  int *not_work_set = new int[l];  old_idx = new int[n];  memset(old_idx, -1, sizeof(int)*n);  double *delta_alpha = new double[n];  // Setup alpha, work and parallel cache status  {    alpha_status = new char[l];    work_status = new char[l];    work_count = new int[l];    p_cache_status = new char[size*l];    idx_cached = new int[n];    idx_not_cached = new int[n];    nz = new int[n];    for(int i=0; i<l; ++i)      {	update_alpha_status(i);	work_status[i] = WORK_N;	work_count[i] = -1;	for(int k=0; k<size; ++k)	  p_cache_status[k*size+i] = NOT_CACHED;      }  }  // Setup local index ranges  {    this->l_low = new int[size];    this->l_up = new int[size];    this->n_low = new int[size];    this->n_up = new int[size];    this->lmn_low = new int[size];    this->lmn_up = new int[size];    setup_range(l_low, l_up, l);    setup_range(n_low, n_up, n);    setup_range(lmn_low, lmn_up, lmn);    this->l_low_loc = l_low[rank];    this->l_up_loc = l_up[rank];    this->n_low_loc = n_low[rank];    this->n_up_loc = n_up[rank];    this->lmn_up_loc = lmn_up[rank];    this->lmn_low_loc = lmn_low[rank];//     this->local_l = l_up_loc - l_low_loc;//     this->local_n = n_up_loc - n_low_loc;//     this->local_lmn = lmn_up_loc - lmn_low_loc;  }  // Setup gradient  {    info("Initializing gradient..."); info_flush();    G = new double[l];    double *G_send = new double[l];    double *G_recv = new double[l];    for(int i=0; i<l; ++i)      {	G[i] = b[i];	G_send[i] = 0;      }    // Determine even distribution of work w.r.t.    // variables at lower bound    int k=0; int count_not_lower=0;    int *idx_not_lower = new int[l];    for(int i=0; i<l; ++i)      {	if(!is_lower_bound(i))	  {	    if(rank == k)	      {		// We have to compute it		idx_not_lower[count_not_lower]=i;		++count_not_lower;	      }	    k = k == size-1 ? 0 : k+1;	  }      }    // Compute local portion of gradient    for(int i=0; i<count_not_lower; ++i)      {	const Qfloat *Q_i = Q.get_Q(idx_not_lower[i],l);	double alpha_i = alpha[idx_not_lower[i]];	for(int j=0; j<l; ++j)	  G_send[j] += alpha_i * Q_i[j];      }    delete[] idx_not_lower;    // Get contributions from other processors    for(int k=0; k<size; ++k)      {	if(rank == k)	  {	    ierr = MPI_Bcast(G_send, l, MPIfloat, k, comm);	    CheckError(ierr);	    for(int i=0; i<l; ++i)	      G[i] += G_send[i];	  }	else	  {	    ierr = MPI_Bcast(G_recv, l, MPIfloat, k, comm);	    CheckError(ierr);	    for(int i=0; i<l; ++i)	      G[i] += G_recv[i];	  }      }    delete[] G_recv;    delete[] G_send;    info("done.\n"); info_flush();     ierr = MPI_Barrier(comm); CheckError(ierr);  }  // Allocate space for local subproblem  Q_bb = new Qfloat[n*n];  QD_b = new Qfloat[n];  alpha_b = new double[n];  c = new double[n];  a = new schar[n];//   time = clock(); //   double tmp = 0; //   for(int i=0; i<100; ++i) //     { //       for(int j=0; j<l; ++j) //         { //           tmp = Q.get_non_cached(i,j); //         } //     } //   time = clock() - time; //   printf("Computation time for 100 kernel rows = %.2lf\n", (double)time/CLOCKS_PER_SEC);   if(rank == 0)    {      info("  it  | setup time | solver it | solver time | gradient time ");      info("| kkt violation\n");    }  iter=0;  // Optimization loop  while(1)    {      // Only one processor does the working set selection.      int status = 0;      if(rank == 0)	{	  if(iter > 0)	    {	      time = MPI_Wtime();	      // info("Starting working set selection\n"); info_flush();	      status = select_working_set(work_set, not_work_set);	      // info("select ws time = %.2f\n", MPI_Wtime() - time);	    }	  else	    {	      // info("Starting init ws\n"); info_flush();	      init_working_set(work_set, not_work_set);	    }	}            // Send status to other processors.      ierr = MPI_Bcast(&status, 1, MPI_INT, 0, comm);      // Now check for optimality      if(status != 0)	break;      // Send new eps, as select_working_set might have      // changed it.      ierr = MPI_Bcast(&eps, 1, MPI_DOUBLE, 0, comm);      // Send new working set size and working set to other processors.      ierr = MPI_Bcast(&n, 1, MPI_INT, 0, comm);      ierr = MPI_Bcast(work_set, n, MPI_INT, 0, comm);      lmn = l - n;      ierr = MPI_Bcast(not_work_set, lmn, MPI_INT, 0, comm);      // Recompute ranges, as n and lmn might have changed      setup_range(n_low, n_up, n);      this->n_low_loc = n_low[rank];      this->n_up_loc = n_up[rank];      setup_range(lmn_low, lmn_up, lmn);      this->lmn_low_loc = lmn_low[rank];      this->lmn_up_loc = lmn_up[rank];//       this->local_n = n_up_loc - n_low_loc;//       this->local_lmn = lmn_up_loc - lmn_low_loc;      ++iter;      // Setup subproblem      time = MPI_Wtime();      for(int i=0; i<n; ++i)	{	  c[i] = G[work_set[i]];	  alpha_b[i] = alpha[work_set[i]];	  a[i] = y[work_set[i]];	}      //      info("Setting up Q_bb..."); info_flush();      for(int i=n_low_loc; i<n_up_loc; ++i)	{// 	  const Qfloat *Q_i = Q.get_Q_subset(work_set[i],work_set,n);	  if(Q.is_cached(work_set[i]))	    {	      const Qfloat *Q_i = Q.get_Q_subset(work_set[i],work_set,n);	      for(int j=0; j<=i; ++j)		{		  Q_bb[i*n+j] = Q_i[work_set[j]];		}	    }	  else if(old_idx[i] == -1)	    {	      for(int j=0; j<=i; ++j)		{		  // 	      Q_bb[i*n+j] = Q_i[work_set[j]];		  Q_bb[i*n+j] = Q.get_non_cached(work_set[i],work_set[j]);		}	    } 	  else	    {	      for(int j=0; j<i; ++j)		{		  if(old_idx[j] == -1)		    Q_bb[i*n+j] = Q.get_non_cached(work_set[i],work_set[j]);		  else		    Q_bb[i*n+j] = Q_bb[old_idx[j]*n+old_idx[i]];		}	      Q_bb[i*n+i] = Q.get_non_cached(work_set[i],work_set[i]);	    } 	}      // Synchronize Q_bb      ierr = MPI_Barrier(comm); CheckError(ierr);      int num_elements = 0;      for(int k=0; k<size; ++k)	{	  ierr = MPI_Bcast(&Q_bb[num_elements], (n_up[k]-n_low[k])*n, 			   MPI_FLOAT, k, comm);	  CheckError(ierr);	  num_elements += (n_up[k]-n_low[k])*n;	}      // Complete symmetric Q      for(int i=0; i<n; ++i)	{	  for(int j=0; j<i; ++j)	    Q_bb[j*n+i] = Q_bb[i*n+j];	}      for(int i=0; i<n; ++i)	{	  for(int j=0; j<n; ++j)	    {	      if(alpha[work_set[j]] > TOL_ZERO)		c[i] -= Q_bb[i*n+j]*alpha[work_set[j]];	    }	  QD_b[i] = Q_bb[i*n+i];	}      //info("done.\n"); info_flush();      time = MPI_Wtime() - time;      problem_setup_time += time;      if(rank == 0)	{	  info("%5d | %10.2f |",iter,time); info_flush();	  time = MPI_Wtime();	// Call SMO inner solver	  solve_inner();	  time = MPI_Wtime() - time;	  info(" %11.2f |", time); info_flush();	  inner_solver_time += time;	  	}      // Send alpha_b to other processors      ierr = MPI_Bcast(alpha_b, n, MPI_DOUBLE, 0, comm);      CheckError(ierr);      // Update gradient.      time = MPI_Wtime();      for(int i=0; i<n; ++i)	{	  delta_alpha[i] = alpha_b[i] - alpha[work_set[i]]; 	  if(fabs(delta_alpha[i]) > TOL_ZERO)	    nz[i] = 1;  	  else 	    nz[i] = 0;	}      //      info("Updating G_b..."); info_flush();      // Compute G_b      if(rank == 0)	{	  // Only first processor does updating, since	  // it has the whole Q_bb	  for(int i=0; i<n; ++i)	    for(int j=0; j<n; ++j) // G_b	      {		if(nz[j])		  G[work_set[i]] += Q_bb[n*i+j]*delta_alpha[j];	      }	}      //      info("done.\n"); info_flush();      determine_cached(work_set);    //   info("count_cached[%d] = %d, count_not_cached[%d] = %d\n", rank, //       	   count_cached, rank, count_not_cached); info_flush();//       MPI_Barrier(comm);      //      printf("count_cached = %d\n", count_cached);      // info("Updating G_n..."); info_flush();      // Compute G_n      for(int j=0; j<lmn; ++j)	G_n[j] = 0;      // First update the cached part...      for(int i=0; i<count_cached; ++i)	{	  const Qfloat *Q_i = Q.get_Q_subset(work_set[idx_cached[i]],					      not_work_set,lmn);	  for(int j=0; j<lmn; ++j)	    G_n[j] += Q_i[not_work_set[j]] * delta_alpha[idx_cached[i]];	}      // ...now update the non-cached part      for(int i=0; i<count_not_cached; ++i)	{	  const Qfloat *Q_i = Q.get_Q_subset(work_set[idx_not_cached[i]],					      not_work_set,lmn);	  for(int j=0; j<lmn; ++j)	    G_n[j] += Q_i[not_work_set[j]] * delta_alpha[idx_not_cached[i]];	}      //      info("done.\n"); info_flush();      // Synchronize gradient with other processors      //      info("Synchronizing gradient..."); info_flush();      sync_gradient(work_set, not_work_set);      //      info("done.\n"); info_flush();      time = MPI_Wtime() - time;      gradient_updating_time += time;      if(rank == 0)	info(" %13.2f |", time); info_flush();      // Update alpha      for(int i=0; i<n; ++i)	{	  alpha[work_set[i]] = alpha_b[i];	  update_alpha_status(work_set[i]);	}    } // while(1)  // Calculate rho  si->rho = calculate_rho();  // Calculate objective value  {    double v = 0;    int i;    for(i=0;i<l;i++)      v += alpha[i] * (G[i] + b[i]);    si->obj = v/2;  }  // Put back the solution  {    for(int i=0;i<l;i++)	alpha_[i] = alpha[i];  }  si->upper_bound_p = Cp;  si->upper_bound_n = Cn;  total_time = MPI_Wtime() - total_time;  // print timing statistics  if(rank == 0)    {      info("\n");      info("Total opt. time = %.2lf\n", total_time); info_flush();      info("Problem setup time = %.2lf (%.2lf%%)\n", problem_setup_time,	   problem_setup_time/total_time*100); info_flush();      info("Inner solver time = %.2lf (%.2lf%%)\n", inner_solver_time,	   inner_solver_time/total_time*100); info_flush();      info("Gradient updating time = %.2lf (%.2lf%%)\n", 	   gradient_updating_time, 	   gradient_updating_time/total_time*100); info_flush();    }  MPI_Barrier(comm);  info("\noptimization finished, #iter = %d\n",iter);  // Clean up  delete[] b;  delete[] y;  delete[] alpha;  delete[] alpha_status;  delete[] delta_alpha;  delete[] work_status;  delete[] work_count;  delete[] work_set;  delete[] not_work_set;  delete[] G;  delete[] Q_bb;  delete[] QD_b;  delete[] alpha_b;  delete[] c;  delete[] a;  delete[] l_low;  delete[] l_up;  delete[] n_low;  delete[] n_up;  delete[] lmn_low;  delete[] lmn_up;  delete[] G_n;  delete[] p_cache_status;  delete[] idx_cached;  delete[] idx_not_cached;  delete[] nz;}void Solver_Parallel_SMO::determine_cached(int *work_set){  count_cached = 0; count_not_cached = 0;  // Update local part of the parallel cache status  for(int i=0; i<l; ++i)    {      if(Q->is_cached(i))	p_cache_status[rank*l + i] = CACHED;      else	p_cache_status[rank*l + i] = NOT_CACHED;    }  // Synchronize parallel cache status  for(int k=0; k<size; ++k)    {      ierr = MPI_Bcast(&p_cache_status[k*l], l, MPI_CHAR, k, comm);       CheckError(ierr);	      }  // Smart parallel cache handling  int next_k = 0; bool found = false;  int next_not_cached = 0;  for(int i=0; i<n; ++i)    {      if(nz[i])	{	  for(int k=next_k; !found && k<size; ++k)	    {	      if(p_cache_status[k*l + work_set[i]] == CACHED)		{		  if(k == rank) // Do we have it?		    {		      idx_cached[count_cached] = i;		      ++count_cached;		    }		  found = true;		  next_k = k == size-1 ? 0 : k+1;		}	    }	  for(int k=0; !found && k<next_k; ++k)	    {	      if(p_cache_status[k*l + work_set[i]] == CACHED)		{		  if(k == rank) // Do we have it?		    {		      idx_cached[count_cached] = i;		      ++count_cached;		    }		  found = true;		  next_k = k == size-1 ? 0 : k+1;		} 	    }	  if(!found) // not in any cache	    {	      if(rank == next_not_cached) // Do we have to compute it?		{		  idx_not_cached[count_not_cached] = i;		  ++count_not_cached;		}	      next_not_cached = next_not_cached == size-1 ? 0 : 		next_not_cached + 1;	    }	  found = false;	} // if(nz[i])    }}void Solver_Parallel_SMO::setup_range(int *range_low, int *range_up, 				       int total_sz){  int local_sz = total_sz/size;  int idx_up = local_sz;  int idx_low = 0;  if(total_sz != 0)    {      for(int i=0; i<size-1; ++i)	{	  range_low[i] = idx_low;	  range_up[i] = idx_up;	  idx_low = idx_up;	  idx_up = idx_low + local_sz + 1;	}      range_low[size-1] = idx_low; range_up[size-1]=total_sz;    }  else    {      for(int i=0; i<size; ++i)	{	  range_low[i] = 0;	  range_up[i] = 0;	}    }}void Solver_Parallel_SMO::sync_gradient(int *work_set, int *not_work_set){  // Synchronize G_b  double *G_buf = new double[l];  if(rank == 0)    {      for(int i=0; i<n; ++i)	G_buf[i] = G[work_set[i]];    }  ierr = MPI_Bcast(G_buf, n, MPI_DOUBLE, 0, comm); CheckError(ierr);  if(rank != 0)    {      for(int i=0; i<n; ++i)	G[work_set[i]] = G_buf[i];    }  // Synchronize G_n  for(int i=0; i<size; ++i)    {      if(rank == i)	{	  for(int j=0; j<lmn; ++j)	    G_buf[j] = G_n[j];	}      ierr = MPI_Bcast(G_buf, lmn, MPI_DOUBLE, i, comm); CheckError(ierr);      // Accumulate contributions      for(int j=0; j<lmn; ++j)	G[not_work_set[j]] += G_buf[j];    }  delete[] G_buf;}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -