📄 loqo_solver.cpp

📁 支持向量分类算法在linux操作系统下的是实现
💻 CPP
📖 第 1 页 / 共 5 页
字号:
{  a = new LOQOfloat[m*local_n];  ierr = PLA_Matrix_create(MPIfloat, m, n, templ, PLA_ALIGN_FIRST,			   PLA_ALIGN_FIRST, &a_global); CheckError(ierr);}void Solver_Parallel_LOQO::allocate_d(){  d = new LOQOfloat[m];  ierr = PLA_Mvector_create(MPIfloat, m, 1, templ, PLA_ALIGN_FIRST,			    &d_global); CheckError(ierr);}void Solver_Parallel_LOQO::allocate_work_space(){  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &x);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &dist);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &g);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &t);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, m, 1, templ, PLA_ALIGN_FIRST, &yy);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &z);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &s);  CheckError(ierr);}void Solver_Parallel_LOQO::setup_a(int *work_set){  for(int i=n_low_loc; i<n_up_loc; ++i)    a[i-n_low_loc] = y[work_set[i]];  ierr = PLA_Obj_set_to_zero(a_global_view); CheckError(ierr);  ierr = PLA_API_begin(); CheckError(ierr);  ierr = PLA_Obj_API_open(a_global_view); CheckError(ierr);  ierr = PLA_API_axpy_matrix_to_global(1, local_n, plus_one, (void *)a, 1, 				       a_global_view, 0, n_low_loc);  CheckError(ierr);  ierr = PLA_Obj_API_close(a_global_view); CheckError(ierr);  ierr = PLA_API_end(); CheckError(ierr);}void Solver_Parallel_LOQO::setup_d(int *not_work_set){  d[0] = 0;  for(int i=0; i<lmn; ++i)    {      if(fabs(alpha[not_work_set[i]]) > TOL_ZERO)	d[0] -= y[not_work_set[i]]*alpha[not_work_set[i]];    }  ierr = PLA_Obj_set_to_zero(d_global); CheckError(ierr);  ierr = PLA_API_begin(); CheckError(ierr);  ierr = PLA_Obj_API_open(d_global); CheckError(ierr);  if(rank == 0)    ierr = PLA_API_axpy_vector_to_global(1, plus_one, (void *) d, 1, d_global, 					 0); CheckError(ierr);  ierr = PLA_Obj_API_close(d_global); CheckError(ierr);  ierr = PLA_API_end(); CheckError(ierr);}void Solver_Parallel_LOQO::setup_up(int *work_set){  for(int i=n_low_loc; i<n_up_loc; ++i)    up[i-n_low_loc] = get_C(work_set[i]);  ierr = PLA_Obj_set_to_zero(up_global_view); CheckError(ierr);  ierr = PLA_API_begin(); CheckError(ierr);  ierr = PLA_Obj_API_open(up_global_view); CheckError(ierr);  ierr = PLA_API_axpy_vector_to_global(local_n, plus_one, (void *) up, 1, 				       up_global_view, n_low_loc);   CheckError(ierr);  ierr = PLA_Obj_API_close(up_global_view); CheckError(ierr);  ierr = PLA_API_end(); CheckError(ierr);}void Solver_Parallel_LOQO::setup_low(){  for(int i=n_low_loc; i<n_up_loc; ++i)    low[i-n_low_loc] = 0;  ierr = PLA_Obj_set_to_zero(low_global_view); CheckError(ierr);  ierr = PLA_API_begin(); CheckError(ierr);  ierr = PLA_Obj_API_open(low_global_view); CheckError(ierr);  ierr = PLA_API_axpy_vector_to_global(local_n, plus_one, (void *) low, 1, 				       low_global_view, n_low_loc);   CheckError(ierr);  ierr = PLA_Obj_API_close(low_global_view); CheckError(ierr);  ierr = PLA_API_end(); CheckError(ierr);}void Solver_Parallel_LOQO::setup_problem(int *work_set, int *not_work_set){  // Note: We have to store Q_bb in column major format  // since this is storage layout expected by PLAPACK-API.  int start_i = 0;  for(int i=0; i<n; ++i, ++start_i)    {      if(work_set[i] >= l_low_loc)	break;    }  num_rows = 0;  for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)    ++num_rows;  for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)    //for(int i=n_low_loc; i<n_up_loc; ++i)    {      const Qfloat *Q_i = Q->get_Q_subset(work_set[i], work_set, n);      //c[i-n_low_loc] = G[work_set[i]];      c[i-start_i] = G[work_set[i]];      for(int j=0; j<n; ++j)	{	  //Q_bb[j*local_n+(i-n_low_loc)] = (LOQOfloat) Q_i[work_set[j]];	  Q_bb[j*num_rows+(i-start_i)] = (LOQOfloat) Q_i[work_set[j]];	  if(alpha[work_set[j]] > TOL_ZERO)	 // c[i-n_low_loc] -= Q_bb[j*local_n+(i-n_low_loc)]*alpha[work_set[j]];	    c[i-start_i] -= Q_bb[j*num_rows+(i-start_i)]*alpha[work_set[j]];	}    }  ierr = MPI_Barrier(comm); CheckError(ierr);  // Setup distributed objects.  ierr = PLA_Obj_set_to_zero(Q_bb_global_view); CheckError(ierr);  ierr = PLA_Obj_set_to_zero(c_global_view); CheckError(ierr);  ierr = PLA_API_begin(); CheckError(ierr);  ierr = PLA_Obj_API_open(Q_bb_global_view); CheckError(ierr);  ierr = PLA_Obj_API_open(c_global_view); CheckError(ierr);//   ierr = PLA_API_axpy_matrix_to_global(local_n, n, plus_one, (void *) Q_bb,// 				       local_n, Q_bb_global_view, n_low_loc, // 				       0); CheckError(ierr);  ierr = PLA_API_axpy_matrix_to_global(num_rows, n, plus_one, (void *) Q_bb,				       num_rows, Q_bb_global_view, start_i, 				       0); CheckError(ierr);//   ierr = PLA_API_axpy_vector_to_global(local_n, plus_one, (void *) c, 1, // 				       c_global_view, n_low_loc); //   CheckError(ierr);  ierr = PLA_API_axpy_vector_to_global(num_rows, plus_one, (void *) c, 1, 				       c_global_view, start_i);   CheckError(ierr);  ierr = PLA_Obj_API_close(Q_bb_global_view); CheckError(ierr);  ierr = PLA_Obj_API_close(c_global_view); CheckError(ierr);  ierr = PLA_API_end(); CheckError(ierr);  setup_a(work_set);  setup_d(not_work_set);}int Solver_Parallel_LOQO::solve_inner(int *work_set){  int result = -1;  double sigdig;  double epsilon_loqo = 1e-10;  double margin;  int iteration;  for(margin=init_margin, iteration=init_iter;       margin <= 0.9999999 && result != OPTIMAL_SOLUTION;)    {      sigdig = -log10(opt_precision);      // run pr_loqo      result = parallel_loqo(c_global_view, Q_bb_global_view, a_global_view, 			     d_global, low_global_view, up_global_view, 			     &x_view, &g_view, &t_view, 			     &yy, &z_view, &s_view, &dist_view, 3,			     sigdig, iteration, margin, up[0]/4, 0);      // Make solution known to all processors, local access      // occurs via alpha_new dist_ and dual      ierr = PLA_Copy(x_view, x_loc_view); CheckError(ierr);      ierr = PLA_Copy(dist_view, dist_loc_view); CheckError(ierr);      ierr = PLA_Copy(yy, yy_loc); CheckError(ierr);      // TODO: What happens if we have a choldc problem in the parallel      // version? Should we implement Manteuffel shifting or find a       // workaround here?      if(result != OPTIMAL_SOLUTION)	{	  // increase number of iterations	  iteration += 2000;	  init_iter += 10;	  // reduce precision	  opt_precision *= 10.0;	  info("NOTICE: Reducing precision of PR_LOQO!\n"); info_flush();	}    }  // Check precision of alphas  for(int i=0; i<n; ++i)    {      // TODO: Should use up[i] and low[i], but they are not global      if(alpha_new[i] < get_C(i)-epsilon_loqo && dist_[i] < -eps)	epsilon_loqo = 2*(get_C(i)-alpha_new[i]);      else if(alpha_new[i] > 0+epsilon_loqo && dist_[i] > eps)	epsilon_loqo = 2*alpha_new[i];    }  info("Using epsilon_loqo= %g\n", epsilon_loqo); info_flush();  // Clip alphas to bounds  for(int i=0; i<n; ++i)    {      if(fabs(alpha_new[i]) < epsilon_loqo)	{	  alpha_new[i] = 0;	}      if(fabs(alpha_new[i]-get_C(i)) < epsilon_loqo)	{	  alpha_new[i] = get_C(i);	}    }  // Compute obj after optimization  double obj_after = 0.0;  int start_i = 0;  for(int i=0; i<n; ++i, ++start_i)    {      if(work_set[i] >= l_low_loc)	break;    }  for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)  //   for(int i=n_low_loc; i<n_up_loc; ++i)    {      //      obj_after += alpha_new[i]*c[i-n_low_loc];      obj_after += alpha_new[i]*c[i-start_i];      for(int j=0; j<n; ++j)// 	obj_after += 0.5*alpha_new[i]*Q_bb[j*local_n+(i-n_low_loc)]// 	  *alpha_new[j];	obj_after += 0.5*alpha_new[i]*Q_bb[j*num_rows+(i-start_i)]	  *alpha_new[j];    }  // Get local changes for obj_after from other processors  double obj_after_loc=0.0;  double obj_after_other=0.0;  for(int i=0; i<size; ++i)    {      if(i == rank)	ierr = MPI_Bcast(&obj_after, 1, MPI_DOUBLE, i, comm);      else	{	  ierr = MPI_Bcast(&obj_after_loc, 1, MPI_DOUBLE, i, comm);	  obj_after_other += obj_after_loc;	}    }  obj_after += obj_after_other;  printf("obj after/before = %g / %g\n", obj_after, obj_before); fflush(stdout);  printf("delta a/b = %g\n", obj_after-obj_before); fflush(stdout);  // Check for progress  if(obj_after >= obj_before)    {      // Increase precision      opt_precision /= 100.0;      ++precision_violations;      info("NOTICE: Increasing Precision of PR_LOQO.\n"); info_flush();    }  if(precision_violations > 500)    {      // Relax stopping criterion      eps *= 10.0;      precision_violations = 0;      info("WARNING: Relaxing epsilon on KKT-Conditions.\n"); info_flush();    }  return result;}void Solver_Parallel_LOQO::Solve(int l, const QMatrix& Q, const double *b_, 				 const schar *y_, double *alpha_, double Cp, 				 double Cn, double eps, SolutionInfo* si, 				 int shrinking){  double total_time = MPI_Wtime();  double problem_setup_time = 0;  double inner_solver_time = 0;  double gradient_updating_time = 0;  double time;  // Initialization  this->l = l;  this->Q = &Q;  this->si = si;  clone(b,b_,l);  clone(y,y_,l);  clone(alpha,alpha_,l);  this->Cp = Cp;  this->Cn = Cn;  this->eps = eps;  this->lmn = l - n;  int *work_set = new int[n];  int *not_work_set = new int[l];  double *delta_alpha = new double[n];  this->G_n = new double[lmn];  this->p_cache_status = new char[size*l];  // Setup alpha status and work status  {    alpha_status = new char[l];    work_status = new char[l];    work_count = new int[l];    for(int i=0; i<l; ++i)      {	update_alpha_status(i);	work_status[i] = WORK_N;	work_count[i] = -1;	for(int k=0; k<size; ++k)	  p_cache_status[k*size+i] = NOT_CACHED;      }  }  // Setup local index ranges  this->l_low = new int[size];  this->l_up = new int[size];  this->n_low = new int[size];  this->n_up = new int[size];  this->lmn_low = new int[size];  this->lmn_up = new int[size];  setup_range(l_low, l_up, l);  setup_range(n_low, n_up, n);  setup_range(lmn_low, lmn_up, lmn);  this->l_low_loc = l_low[rank];  this->l_up_loc = l_up[rank];  this->n_low_loc = n_low[rank];  this->n_up_loc = n_up[rank];  this->lmn_up_loc = lmn_up[rank];  this->lmn_low_loc = lmn_low[rank];  this->local_l = l_up_loc - l_low_loc;  this->local_n = n_up_loc - n_low_loc;  this->local_lmn = lmn_up_loc - lmn_low_loc;  // Setup gradient  {    info("Initializing gradient..."); info_flush();    G = new double[l];    double *G_send = new double[l];    double *G_recv = new double[l];    for(int i=0; i<l; ++i)      {	G[i] = b[i];	G_send[i] = 0;      }    // Compute local portion of gradient    for(int i=l_low_loc; i<l_up_loc; ++i)      { 	if(!is_lower_bound(i)) 	  { 	    const Qfloat *Q_i = Q.get_Q(i,l); 	    double alpha_i = alpha[i]; 	    for(int j=0; j<l; ++j) 	      G_send[j] += alpha_i * Q_i[j];	  }      }    // Get contributions from other processors    for(int k=0; k<size; ++k)      {	if(rank == k)	  {	    ierr = MPI_Bcast(G_send, l, MPIfloat, k, comm);	    CheckError(ierr);	    for(int i=0; i<l; ++i)	      G[i] += G_send[i];	  }	else	  {	    ierr = MPI_Bcast(G_recv, l, MPIfloat, k, comm);	    CheckError(ierr);	    for(int i=0; i<l; ++i)	      G[i] += G_recv[i];	  }      }    delete[] G_recv;    delete[] G_send;//     for(int i=0; i<l; ++i)//       {//  	if(!is_lower_bound(i))//  	  {// 	    const Qfloat *Q_i = Q.get_Q(i,l); // 	    double alpha_i = alpha[i];// 	    for(int j=0; j<l; ++j)// 	      {// 		G[j] += alpha_i * Q_i[j];// 	      }//  	  }//       }    info("done.\n"); info_flush();  }  // Allocate local & global space for problem setup  //Q_bb = new LOQOfloat[local_n * n];  Q_bb = new LOQOfloat[n*n];  //   c = new LOQOfloat[local_n];  c = new LOQOfloat[n];  up = new LOQOfloat[local_n];  low = new LOQOfloat[local_n];  allocate_a();  allocate_d();  allocate_work_space();  ierr = PLA_Matrix_create(MPIfloat, n, n, templ, PLA_ALIGN_FIRST,			   PLA_ALIGN_FIRST, &Q_bb_global); CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &c_global);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &up_global);  CheckError(ierr);  ierr = PLA_Mvector_create(MPIfloat, n, 1, templ, PLA_ALIGN_FIRST, &low_global);  CheckError(ierr);  ierr = PLA_Mscalar_create(MPIfloat, PLA_ALL_ROWS, PLA_ALL_COLS, n, 1, templ,			    &x_loc); CheckError(ierr);  ierr = PLA_Mscalar_create(MPIfloat, PLA_ALL_ROWS, PLA_ALL_COLS, n, 1, templ,			    &dist_loc); CheckError(ierr);  ierr = PLA_Mscalar_create(MPIfloat, PLA_ALL_ROWS, PLA_ALL_COLS, m, 1, templ,			    &yy_loc); CheckError(ierr);  ierr = PLA_Mscalar_create(MPIfloat, PLA_ALL_ROWS, PLA_ALL_COLS, 1, 1, templ,			    &plus_one); CheckError(ierr);  ierr = PLA_Obj_set_to_one(plus_one); CheckError(ierr);  iter=0;  // Optimization loop  while(1)    {      // Only one processor does the working set selection.      int status = 0;      if(rank == 0)	{	  if(iter > 0)	    {	      status = select_working_set(work_set, not_work_set);	    }	  else	    {	      init_working_set(work_set, not_work_set);	    }	}            // Send status to other processors.      ierr = MPI_Bcast(&status, 1, MPI_INT, 0, comm);      // Now check for optimality      if(status != 0)	break;      // Send new opt_precision, as select_working_set might have      // changed it.      ierr = MPI_Bcast(&opt_precision, 1, MPI_DOUBLE, 0, comm);      // Send new working set size and working set to other processors.      ierr = MPI_Bcast(&n, 1, MPI_INT, 0, comm);      ierr = MPI_Bcast(work_set, n, MPI_INT, 0, comm);      lmn = l - n;      ierr = MPI_Bcast(not_work_set, lmn, MPI_INT, 0, comm);//       info("working set proc[%d]=", rank);//       for(int i=0; i<n; ++i)// 	info(" %d", work_set[i]);//       info("\n"); info_flush();//       info("not workset proc[%d]=", rank);//       for(int i=0; i<lmn; ++i)// 	info(" %d", not_work_set[i]);//       info("\n"); info_flush();      // Recompute ranges, as n and lmn might have changed      setup_range(n_low, n_up, n);      this->n_low_loc = n_low[rank];      this->n_up_loc = n_up[rank];      setup_range(lmn_low, lmn_up, lmn);      this->lmn_low_loc = lmn_low[rank];      this->lmn_up_loc = lmn_up[rank];      this->local_n = n_up_loc - n_low_loc;      this->local_lmn = lmn_up_loc - lmn_low_loc;      // Create new views into existing objs, as n might      // have changed      ierr = PLA_Obj_view(Q_bb_global, n, n, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &Q_bb_global_view);      CheckError(ierr);      ierr = PLA_Obj_view(c_global, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &c_global_view);      CheckError(ierr);      ierr = PLA_Obj_view(a_global, m, n, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &a_global_view);      CheckError(ierr);      ierr = PLA_Obj_view(up_global, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &up_global_view);      CheckError(ierr);      ierr = PLA_Obj_view(low_global, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &low_global_view);      CheckError(ierr);      ierr = PLA_Obj_view(x, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &x_view);      CheckError(ierr);      ierr = PLA_Obj_view(dist, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &dist_view);      CheckError(ierr);      ierr = PLA_Obj_view(g, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &g_view);      CheckError(ierr);      ierr = PLA_Obj_view(t, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &t_view);      CheckError(ierr);      ierr = PLA_Obj_view(z, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &z_view);      CheckError(ierr);      ierr = PLA_Obj_view(s, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &s_view);      CheckError(ierr);      ierr = PLA_Obj_view(x_loc, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &x_loc_view);      CheckError(ierr);      ierr = PLA_Obj_view(dist_loc, n, 1, PLA_ALIGN_FIRST,			  PLA_ALIGN_FIRST, &dist_loc_view);      CheckError(ierr);      void *addr_buf = NULL;
💿 文件大小 68 K
👤 上传用户 dingjuan_01
📂 所属分类 Linux/Unix编程
🏷️ 相关标签

#linux #向量 #分类算法 #操作系统
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -