📄 loqo_solver.cpp

📁 支持向量分类算法在linux操作系统下的是实现
💻 CPP
📖 第 1 页 / 共 5 页
字号:
      ierr = PLA_Obj_local_buffer(x_loc_view, (void **) &addr_buf);      CheckError(ierr);      alpha_new = (LOQOfloat *) addr_buf;      ierr = PLA_Obj_local_buffer(yy_loc, (void **) &addr_buf);      CheckError(ierr);      dual = (LOQOfloat *) addr_buf;      ierr = PLA_Obj_local_buffer(dist_loc_view, (void **) &addr_buf);      dist_ = (LOQOfloat *) addr_buf;      ++iter;      // Setup problem for parallel LOQO solver      time = MPI_Wtime();      info("setup_problem...");      setup_problem(work_set, not_work_set);      info("done.\n"); info_flush();      info("setup_up...");      setup_up(work_set);      info("done.\n"); info_flush();      info("setup_low...");      setup_low();      info("done.\n"); info_flush();      //print_problem();      time = MPI_Wtime() - time;      problem_setup_time += time;      // Compute obj before optimization      info("Computing obj before..."); info_flush();      obj_before = 0.0;      int start_i = 0;      for(int i=0; i<n; ++i, ++start_i)	{	  if(work_set[i] >= l_low_loc)	    break;	}      //      for(int i=n_low_loc; i<n_up_loc; ++i)      for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)	{	  //obj_before += alpha[work_set[i]]*c[i-n_low_loc];	  obj_before += alpha[work_set[i]]*c[i-start_i]; 	  for(int j=0; j<n; ++j)//  	    obj_before += 0.5*alpha[work_set[i]]*Q_bb[local_n*j+(i-n_low_loc)]*// 	      alpha[work_set[j]]; 	    obj_before += 0.5*alpha[work_set[i]]*Q_bb[num_rows*j+(i-start_i)]*	      alpha[work_set[j]];	}      // Get local changes for obj_before from other processors      double obj_before_loc=0.0;      double obj_before_other=0.0;      for(int i=0; i<size; ++i)	{	  if(i == rank)	    ierr = MPI_Bcast(&obj_before, 1, MPI_DOUBLE, i, comm);	  else	    {	      ierr = MPI_Bcast(&obj_before_loc, 1, MPI_DOUBLE, i, comm);	      obj_before_other += obj_before_loc;	    }	}      obj_before += obj_before_other;      printf("obj before = %g\n", obj_before); fflush(stdout);      info("done.\n"); info_flush();      // Run solver      time = MPI_Wtime();      status = solve_inner(work_set);      time = MPI_Wtime() - time;      inner_solver_time += time;      // Update gradient.      time = MPI_Wtime();      int *nz = new int[n];      for(int i=0; i<n; ++i)	{	  delta_alpha[i] = alpha_new[i] - alpha[work_set[i]];	  if(fabs(delta_alpha[i]) > TOL_ZERO)	    nz[i] = 1; 	  else	    nz[i] = 0;	}      info("Updating G_b..."); info_flush();      // Compute G_b      for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)      // for(int i=n_low_loc; i<n_up_loc; ++i)	{ 	  for(int j=0; j<n; ++j)	    { 	      if(nz[j])// 		G[work_set[i]] += Q_bb[j*local_n+(i-n_low_loc)] * // 		  delta_alpha[j];		G[work_set[i]] += Q_bb[j*num_rows+(i-start_i)] * 		  delta_alpha[j];	    }	}      info("done.\n"); info_flush();      // Update local part of the parallel cache status      for(int i=0; i<l; ++i)	{	  if(Q.is_cached(i))	    p_cache_status[rank*size + i] = CACHED;	  else	    p_cache_status[rank*size + i] = NOT_CACHED;	}      // Synchronize parallel cache status      for(int k=0; k<size; ++k)	{	  ierr = MPI_Bcast(&p_cache_status[k*size], l, MPI_CHAR, k, comm); 	  CheckError(ierr);	  	}//       printf("p_cache_status %d =",rank);//       for(int k=0; k<size; ++k)// 	{// 	  for(int i=0; i<l; ++i)// 	    {// 	      printf(" %d", p_cache_status[k*size+i]);// 	    }// 	  printf("\n");// 	}//       printf("\n");      // Smart parallel cache handling      int *idx_cached = new int[n];      int *idx_not_cached = new int[n];      int count_cached = 0; int count_not_cached = 0;      int next_k = 0; bool found = false;      int next_not_cached = 0;      for(int i=0; i<n; ++i)	{	  if(nz[i])	    {	      for(int k=next_k; !found && k<size; ++k)		{		  if(p_cache_status[k*size + work_set[i]] == CACHED)		    {		      if(k == rank) // Do we have it?			{			  idx_cached[count_cached] = i;			  ++count_cached;			}		      found = true;		      next_k = k == size-1 ? 0 : k+1;		    }		}	      for(int k=0; !found && k<next_k; ++k)		{		  if(p_cache_status[k*size + work_set[i]] == CACHED)		    {		      if(k == rank) // Do we have it?			{			  idx_cached[count_cached] = i;			  ++count_cached;			}		      found = true;		      next_k = k == size-1 ? 0 : k+1;		    } 		}	      if(!found) // not in any cache		{		  if(rank == next_not_cached) // Do we have to compute it?		    {		      idx_not_cached[count_not_cached] = i;		      ++count_not_cached;		    }		  next_not_cached = next_not_cached == size-1 ? 0 : 		    next_not_cached + 1;		}	      found = false;	    } // if(nz[i])	}      info("Updating G_n..."); info_flush();      // Compute G_n      for(int j=0; j<lmn; ++j)	G_n[j] = 0;//       printf("idx_cached %d =", rank);//       for(int i=0; i<count_cached; ++i)// 	printf(" %d", idx_cached[i]);//       printf("\n");//       printf("idx_not_cached %d =", rank);//       for(int i=0; i<count_not_cached; ++i)// 	printf(" %d", idx_not_cached[i]);//       printf("\n");      // First update the cached part...      for(int i=0; i<count_cached; ++i)	{	  const Qfloat *Q_i = Q.get_Q_subset(work_set[idx_cached[i]],					      not_work_set,lmn);	  for(int j=0; j<lmn; ++j)	    G_n[j] += Q_i[not_work_set[j]] * delta_alpha[idx_cached[i]];	}      // now update the non-cached part      for(int i=0; i<count_not_cached; ++i)	{	  const Qfloat *Q_i = Q.get_Q_subset(work_set[idx_not_cached[i]],					      not_work_set,lmn);	  for(int j=0; j<lmn; ++j)	    G_n[j] += Q_i[not_work_set[j]] * delta_alpha[idx_not_cached[i]];	}//       for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)// 	 //      for(int i=n_low_loc; i<n_up_loc; ++i)// 	{// 	  if(nz[i]){// 	    const Qfloat *Q_i = Q.get_Q_subset(work_set[i],not_work_set,lmn);// 	    for(int j=0; j<lmn; ++j)// 	      G_n[j] += Q_i[not_work_set[j]] * delta_alpha[i];// 	  }// 	}            info("done.\n"); info_flush();      delete[] idx_cached;      delete[] idx_not_cached;      delete[] nz;      time = MPI_Wtime() - time;      gradient_updating_time += time;      // Synchronize gradient with other processors      info("Synchronizing gradient..."); info_flush();      sync_gradient(work_set, not_work_set);      info("done.\n"); info_flush();//       info("synced G proc[%d]=",rank);//       for(int i=0; i<l; ++i)// 	info(" %g", G[i]);//       info("\n");//       info_flush();      // Update alpha      for(int i=0; i<n; ++i)	{	  alpha[work_set[i]] = alpha_new[i];	  update_alpha_status(work_set[i]);	}    } // while(1)  // Calculate rho  si->rho = calculate_rho();  // Calculate objective value  {    double v = 0;    int i;    for(i=0;i<l;i++)      v += alpha[i] * (G[i] + b[i]);    si->obj = v/2;  }  // Put back the solution  {//     printf("alpha = ");    for(int i=0;i<l;i++)      {	alpha_[i] = alpha[i];// 	printf(" %g\n",alpha[i]);      }//     printf("\n");  }  si->upper_bound_p = Cp;  si->upper_bound_n = Cn;  info("\noptimization finished, #iter = %d\n",iter);  total_time = MPI_Wtime() - total_time;  // print timing statistics  if(rank == 0)    {      info("Total opt. time = %lf\n", total_time); info_flush();      info("Problem setup time = %lf (%lf%%)\n", problem_setup_time,	   problem_setup_time/total_time*100); info_flush();      info("Inner solver time = %lf (%lf%%)\n", inner_solver_time,	   inner_solver_time/total_time*100); info_flush();      info("Gradient updating time = %lf (%lf%%)\n", gradient_updating_time,	   gradient_updating_time/total_time*100); info_flush();    }  // Clean up  delete[] b;  delete[] y;  delete[] alpha;  delete[] alpha_status;  delete[] delta_alpha;  delete[] work_status;  delete[] work_count;  delete[] work_set;  delete[] not_work_set;  delete[] G;  delete[] Q_bb;  delete[] c;  delete[] up;  delete[] low;  delete[] a;  delete[] d;  delete[] l_low;  delete[] l_up;  delete[] n_low;  delete[] n_up;  delete[] lmn_low;  delete[] lmn_up;  delete[] G_n;  delete[] p_cache_status;  ierr = PLA_Obj_free(&Q_bb_global); CheckError(ierr);  ierr = PLA_Obj_free(&c_global); CheckError(ierr);  ierr = PLA_Obj_free(&up_global); CheckError(ierr);  ierr = PLA_Obj_free(&low_global); CheckError(ierr);  ierr = PLA_Obj_free(&a_global); CheckError(ierr);  ierr = PLA_Obj_free(&d_global); CheckError(ierr);  ierr = PLA_Obj_free(&x_loc); CheckError(ierr);  ierr = PLA_Obj_free(&x); CheckError(ierr);  ierr = PLA_Obj_free(&g); CheckError(ierr);  ierr = PLA_Obj_free(&t); CheckError(ierr);  ierr = PLA_Obj_free(&yy); CheckError(ierr)  ierr = PLA_Obj_free(&yy_loc); CheckError(ierr);  ierr = PLA_Obj_free(&z); CheckError(ierr);  ierr = PLA_Obj_free(&s); CheckError(ierr);}void Solver_Parallel_LOQO::sync_gradient(int *work_set, int *not_work_set){  int start_i = 0;  for(int i=0; i<n; ++i, ++start_i)    {      if(work_set[i] >= l_low_loc)	break;    }  // Synchronize G_b  //  double *G_send = new double[local_n];  double *G_send = new double[num_rows];  double *G_recv = new double[n];  int count=0;  for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)    //for(int i=n_low_loc; i<n_up_loc; ++i)    {      G_send[count] = G[work_set[i]];      ++count;    }//   printf("G_send = ");//   for(int i=start_i; work_set[i] < l_up_loc && i<n; ++i)//     printf(" %g", G_send[i]);//   printf("\n");//   printf("count = %d\n", count);//   printf("num_rows = %d\n", num_rows);  int *sendcounts = new int[size];  int *sdispls = new int[size];  for(int i=0; i<size; ++i)    {      //      sendcounts[i] = local_n;      sendcounts[i] = num_rows;      sdispls[i] = 0;    }  int *recvcounts = new int[size];  int *rdispls = new int[size];  for(int k=0; k<size; ++k)    {      int start_i_other = 0;      for(int i=0; i<n; ++i, ++start_i_other)	if(work_set[i] >= l_low[k])	  break;      int num_rows_other = 0;      for(int i=start_i_other; work_set[i] < l_up[k] && i<n; ++i)	++num_rows_other;      recvcounts[k] = num_rows_other;      if(k == 0)	rdispls[k] = 0;      else	rdispls[k] = rdispls[k-1] + recvcounts[k-1];    }//   recvcounts[0] = n_up[0] - n_low[0];//   rdispls[0] = 0;//   for(int i=1; i<size; ++i)//     {//       recvcounts[i] = n_up[i] - n_low[i];//       rdispls[i] = rdispls[i-1] + recvcounts[i-1];//     }  ierr = MPI_Alltoallv(G_send, sendcounts, sdispls, MPI_DOUBLE,		       G_recv, recvcounts, rdispls, MPI_DOUBLE, comm);  CheckError(ierr);  // Update local G_b  for(int k=0; k<size; ++k)     {      if(k != rank)	{	  int start_i_other = 0;	  for(int i=0; i<n; ++i, ++start_i_other)	    if(work_set[i] >= l_low[k])	      break;	  // G_b	  for(int i=start_i_other; work_set[i] < l_up[k] && i<n; ++i)	    G[work_set[i]] = G_recv[rdispls[k]+(i-start_i_other)];// 	  for(int j=n_low[i]; j<n_up[i]; ++j)// 	    G[work_set[j]] = G_recv[rdispls[i]+(j-n_low[i])];	}    }  delete[] G_send;  delete[] G_recv;  delete[] sendcounts;  delete[] sdispls;  delete[] recvcounts;  delete[] rdispls;  double *G_buf = new double[lmn];  // Synchronize G_n  for(int i=0; i<size; ++i)    {      if(rank == i)	{	  for(int j=0; j<lmn; ++j)	    G_buf[j] = G_n[j];	}      ierr = MPI_Bcast(G_buf, lmn, MPI_DOUBLE, i, comm); CheckError(ierr);      // Accumulate contributions      for(int j=0; j<lmn; ++j)	G[not_work_set[j]] += G_buf[j];    }  delete[] G_buf;}int Solver_LOQO::select_working_set(int *work_set, int *not_work_set){ printf("selecting working set...");  // reset work status  n = n_old;  for(int i=0; i<l; ++i)      work_status[i] = WORK_N;  double Gmax1 = -INF;		// max { -y_i * grad(f)_i | i in I_up(\alpha) }  double Gmax2 = -INF;		// max { y_i * grad(f)_i | i in I_low(\alpha) }  for(int i=0; i<l; ++i)    {      if(!is_upper_bound(i))	{	  if(y[i] == +1)	    {	      if(-G[i] > Gmax1)		Gmax1 = -G[i];	    }	  else	    {	      if(-G[i] > Gmax2)		Gmax2 = -G[i];	    }	}      if(!is_lower_bound(i))	{	  if(y[i] == +1)	    {	      if(G[i] > Gmax2)		Gmax2 = G[i];	    }	  else	    {	      if(G[i] > Gmax1)		Gmax1 = G[i];	    }	}    }  // check for optimality  printf("Gmax1 + Gmax2 = %g < %g\n", Gmax1+Gmax2,eps);  if(Gmax1 + Gmax2 < eps)      return 1;  // Compute yG  double *yG = new double[l];  int *pidx = new int[l];  for(int i=0; i<l; ++i)    {      if(y[i] == +1)	yG[i] = G[i];      else	yG[i] = -G[i];      pidx[i] = i;    }  quick_sort(yG, pidx, 0, l-1);//   printf("yG = ");//   for(int i=0; i<l; ++i)//     printf(" %g",yG[i]);//   printf("\n");//   printf("pidx = ");//   for(int i=0; i<l; ++i)//     printf(" %d",pidx[i]);//   printf("\n");  int top=l-1; int bot=0; int count=0;  // Select a full set initially  int nselect = iter == 0 ? n : q;  while(top > bot && count < nselect)    {      while(!(is_free(pidx[top]) 	      || (is_upper_bound(pidx[top]) && y[pidx[top]] == +1)	      || (is_lower_bound(pidx[top]) && y[pidx[top]] == -1)	      ))	--top;      while(!(is_free(pidx[bot])	      || (is_upper_bound(pidx[bot]) && y[pidx[bot]] == -1)	      || (is_lower_bound(pidx[bot]) && y[pidx[bot]] == +1)	      ))	++bot;      if(top > bot)
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -