📄 smo-implement.txt

📁 C++编写的SMO算法实现！和大家共同分享！
💻 TXT
字号:
#include "stdafx.h"
#include "SVM_SMO.h"

//构造函数和析构函数
//---------------------------------------------------------------------------------
SMO::SMO(void):
 N(0),
 d(400),
 C(100.0f),
 tolerance(0.001f),
 two_sigma_squared(10.0f),
 is_test_only(true),
 first_test_i(0),
 end_support_i(-1),
 eps(0.001f) 
{
  data_file_name="svm.data";
  svm_file_name="svm.model";
  output_file_name="svm.output";
  output_see_alph="alph.data";

}
SMO::~SMO(void)
{

}
//学习方程  f(x)=alph*Y*k(x,x)-b, 是从i到N
//-------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------
float SMO::learned_func_nonlinear(int k)
{
 float sum=0;
 for(int i=0;i<end_support_i;i++)
 {
  if(alph[i]>0)
   sum+=alph[i]*target[i]*kernel_func(i,k);
 }
 sum-=b;
 return sum;
}

//径向基核函数
float SMO::kernel_func(int i,int k)
{
 float sum=dot_product_func(i,k);
 sum*=-2;
 sum+=self_dot_product[i]+self_dot_product[k];
 return exp(-sum/two_sigma_squared);
}

//向量之间的点乘函数，其中i,k，为索引
float SMO::dot_product_func(int i,int k)
{
 float dot=0;
 for(int j=0;j<d;j++)
  dot+=dense_points[i][j]*dense_points[k][j];
 return dot;
}

void SMO::precomputed_self_dot_product()
{
 self_dot_product.resize(N);
 for(int i=0;i<N;i++)
  self_dot_product[i]=dot_product_func(i,i);
}

//函数保存和读取
//---------------------------------------------------------------------------------------------
//---------------------------------------------------------------------------------------------
int SMO::read_data(istream& is)
{
 string s;
 int n_lines;
 for(n_lines=0;getline(is,s,'\n');n_lines++)
 {
  istringstream line(s);
  vector<int> v;
  int  t;
  while(line>>t)
   v.push_back(t);
  target.push_back(v.back());
  v.pop_back();
  int n=(int)v.size();
  if(v.size()!=d)
  {
   cerr<<"data file error :line"<<n_lines+1
    <<"has"<<(int)v.size()<<"attributes;should be d="<<d
    <<endl;
   exit(1);
  }
  dense_points.push_back(v);
 }
 return n_lines;
}

void SMO::write_svm(ostream& os)
{
 os<<d<<endl;                 //图像的总维数
 os<<b<<endl;    //
 os<<two_sigma_squared<<endl;
 int n_support_vectors=0;
 for(int i=0;i<end_support_i;i++)
 {
  if(alph[i]>0)
   n_support_vectors++;
 }
 os<<n_support_vectors<<endl;  //支持向量的数量
 for(int i=0;i<end_support_i;i++)
  if(alph[i]>0)
   os<<alph[i]<<endl;
 for(int i=0;i<end_support_i;i++)
  if(alph[i]>0)
  {
   for(int j=0;j<d;j++)
    os<<dense_points[i][j]<<' ';
   os<<target[i];
   os<<endl;
  }
}

void SMO::write_alph(ostream &os)
{
 for(int i=0;i<end_support_i;i++)
 {
  if(alph[i]<0)
   os<<i<<":  "<<alph[i]<<"  *****0-0-0*****  "<<endl;  //smaller than 0;
  else
   if(alph[i]==0)
    os<<i<<":  "<<alph[i]<<"  &&&&&&0000&&&&&  "<<endl;  //equal with 0;
   else 
    if(alph[i]==C)
     os<<i<<":  "<<alph[i]<<"  $$$$$CCCCC$$$$$  "<<endl;
    else
     os<<i<<":  "<<alph[i]<<endl;
 }
}


int SMO::read_svm(istream& is)
{
 is>>d;
 is>>b;
 is>>two_sigma_squared;
 int n_support_vectors;
 is>>n_support_vectors;
 alph.resize(n_support_vectors,0.);
 for(int i=0;i<n_support_vectors;i++)
 {
  is>>alph[i];
 }
 string dummy_line_to_skip_newline;
 getline(is,dummy_line_to_skip_newline,'\n');
 return read_data(is);
}

void SMO::read_in_data()
{
 int n;
 if(is_test_only)
 {
  ifstream svm_file(svm_file_name);
  end_support_i=first_test_i=n=read_svm(svm_file);
  N+=n;
 }
 if(N>0)
 {
  target.reserve(N);
  dense_points.reserve(N);
 }
 ifstream data_file(data_file_name);
 n=read_data(data_file);
 if(is_test_only)
 {
  N=first_test_i+n;
 }
 else
 {
  N=n;
  first_test_i=0;
  end_support_i=N;
 }
}

//检测和结构输出
//-----------------------------------------------------------------------------------
//-----------------------------------------------------------------------------------
void SMO::output_data()
{
 ofstream output_file(output_file_name);
 for(int i=first_test_i;i<N;i++)
 {
  output_file<<learned_func_nonlinear(i)<<" ******* "<<target[i]<<endl;
 }
}

//检测率的查询
float SMO::error_rate()
{
 int n_total=0;
 int n_error=0;
 for(int i=first_test_i;i<N;i++)
 {
  if((learned_func_nonlinear(i)>0)!=(target[i]>0))
  {
   n_error++;
  }
  n_total++;
 }
 return float(n_error)/float(n_total);
}


//更新参数
//-------------------------------------------------------------------------------------
//-------------------------------------------------------------------------------------
int SMO::takeStep(int i1, int i2) 
{ 
 int y1, y2, s;
 float delta_b;
 float alph1, alph2;     //* old_values of alpha_1, alpha_2 
 float a1, a2;           //* new values of alpha_1, alpha_2 
 float E1, E2, L, H, k11, k22, k12, eta, Lobj, Hobj;
 if (i1 == i2) 
  return 0;
 /******************************************************/
 //读取参量ｙ，Ｅ
 alph1 = alph[i1];
 y1 = target[i1];
 if (alph1 > 0 && alph1 < C)
 {
  E1 = error_cache[i1];
 }
 else 
 {
  E1 = learned_func_nonlinear(i1) - y1;
 }
 alph2 = alph[i2];
 y2 = target[i2];
 if (alph2 > 0 && alph2 < C)
 {
  E2 = error_cache[i2];
 }
 else 
 {
  E2 = learned_func_nonlinear(i2) - y2;
 }
  s = y1 * y2;
 /*******************************************************/
  //计算　Ｈ，Ｌ
  if (y1 == y2)
  {
  float gamma = alph1 + alph2;
  if (gamma > C) 
  {
           L = gamma-C;
           H = C;
   }
  else 
  {
           L = 0;
           H = gamma;
  }
     }
     else
  {
         float gamma = alph1 - alph2;
         if (gamma > 0) 
   {
             L = 0;
             H = C - gamma;
         }
         else
   {
             L = -gamma;
             H = C;
         }
     }
  if(L==H)
   return 0;
  /********************************************************/
  //计算eta＝2K12-K11-K22
 k11 = kernel_func(i1, i1);
 k12 = kernel_func(i1, i2);
 k22 = kernel_func(i2, i2);
 eta = 2 * k12 - k11 - k22;
 /********************************************************/
 //分ｅｔｃ＞和＜０，计算新的ａｌｐｈ２
 if (eta < 0) 
 {
      a2 = alph2 + y2 * (E2 - E1) / eta;
      if (a2 < L)
   {
        a2 = L;
   }
      else
   {
    if (a2 > H)
   a2 = H;
   }
    }
 else 
 {
  float c1 = eta/2;
        float c2 = y2 * (E1-E2)- eta * alph2;
        Lobj = c1 * L * L + c2 * L;
        Hobj = c1 * H * H + c2 * H;
  if (Lobj > Hobj+eps)
   a2 = L;
  else if (Lobj < Hobj-eps)
   a2 = H;
  else
   a2 = alph2;
 }

 /********************************************************/
 //计算新的ａｌｐｈ１
 if (fabs(a2-alph2) < eps*(a2+alph2+eps))
  return 0;
 a1 = alph1 - s * (a2 - alph2);
 if (a1 < 0) 
 {
  a2 += s * a1;
  a1 = 0;
 }
 else if (a1 > C) 
 {
  float t = a1-C;
  a2 += s * t;
  a1 = C;
 }
 
 /**********************************************************/
 //更新ｂ
 float b1, b2, bnew;
 if (a1 > 0 && a1 < C)
 {
  bnew = b + E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12;
 }
    else 
 {
        if (a2 > 0 && a2 < C)
            bnew = b + E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22;
        else 
  {
            b1 = b + E1 + y1 * (a1 - alph1) * k11 + y2 * (a2 - alph2) * k12;
            b2 = b + E2 + y1 * (a1 - alph1) * k12 + y2 * (a2 - alph2) * k22;
            bnew = (b1 + b2) / 2;
        }
    }
 delta_b = bnew - b;
    b = bnew;


 ///************************************************************/
 ////更新w
 float t1 = y1 * (a1 - alph1);
 float t2 = y2 * (a2 - alph2);  
 //   for (int i=0; i<d; i++)
 //{
 // w[i] += dense_points[i1][i] * t1 + dense_points[i2][i] * t2;
 //}


 /*************************************************************/
 //更新E
  //float t1 = y1 * (a1-alph1);
     // float t2 = y2 * (a2-alph2);
  
    for (int i=0; i<end_support_i; i++)
 {
  if (0 < alph[i] && alph[i] < C)
  {
          error_cache[i] +=  t1 * kernel_func(i1,i) + t2 * kernel_func(i2,i)-delta_b;
  }
 }
 error_cache[i1] = 0.;
 error_cache[i2] = 0.;

    /***********************************************************/
 //存储新的ａｌｐｈ１和ａｌｐｈ２
 alph[i1] = a1;   /* Store a1 in the alpha array.*/
 alph[i2] = a2;   /* Store a2 in the alpha array.*/
 return 1;

}

//---------------------------------------------------------------------------------------------------
/*以下两层循环，开始时检查所有样本，选择不符合KKT条件的两个乘子进行优化，选择成功，返回1，否则，返回0
所以成功了，numChanged必然>0，从第二遍循环时，就不从整个样本中去寻找不符合KKT条件的两个乘子进行优化，
而是从边界的乘子中去寻找，因为非边界样本需要调整的可能性更大，边界样本往往不被调整而始终停留在边界上。
如果，没找到，再从整个样本中去找，直到整个样本中再也找不到需要改变的乘子为止，此时，算法结束。*/
//---------------------------------------------------------------------------------------------------
int SMO::examineExample(int i1)
{
 float y1, alph1, E1, r1;
 y1 = (float)target[i1];
 alph1 = alph[i1];
 if (alph1 > 0 && alph1 < C)
  E1 = error_cache[i1];
 else 
  E1 = learned_func_nonlinear(i1) - y1;

 r1 = y1 * E1;
 if ((r1 < -tolerance && alph1 < C)||(r1 > tolerance && alph1 > 0))
 { 
  /////////////使用三种方法选择第二个乘子
  //1：在non-bound乘子中寻找maximum fabs(E1-E2)的样本
  //2：如果上面没取得进展,那么从随机位置查找non-boundary 样本
  //3：如果上面也失败，则从随机位置查找整个样本,改为bound样本
  if (examineFirstChoice(i1,E1))          //1
      {
           return 1;
       }
      if (examineNonBound(i1))           //2
      {
         return 1;
      }
       if (examineBound(i1))               //3
      {
         return 1;
       }
 }
 ///没有进展
     return 0;
}

int SMO::examineFirstChoice(int i1,float E1)
{
 int k,i2;
 float tmax;  
 for(i2=-1,tmax=0.0,k=0;k<end_support_i;k++)
 {
     if(alph[k]>0&&alph[k]<C)
     {
      float E2,temp;
      E2=error_cache[k];
      temp=fabs(E1-E2);
      if(temp>tmax)
      {
       tmax=temp;
       i2=k;
      }
     }
 }
   if(i2>=0)
   {
    if(takeStep(i1,i2))
     return 1;
   }
   return 0;
}

// 2：如果上面没取得进展,那么从随机位置查找non-boundary 样本
int SMO::examineNonBound(int i1)
{
  int k,k0 = rand()%end_support_i;
     int i2;
    for (k = 0; k < end_support_i; k++)
    {
     i2 = (k + k0) % end_support_i;      //从随机位开始
       if (alph[i2] > 0.0 && alph[i2] < C)
  {
   if (takeStep(i1, i2))
         {
    return 1;
   }
  }
  }
    return 0;
}

//3：如果上面也失败，则从随机位置查找整个样本,(改为bound样本)
int SMO::examineBound(int i1)
{
 int k,k0 = rand()%end_support_i;
    int i2;
   for (k = 0; k < end_support_i; k++)
 {
  i2 = (k + k0) % end_support_i;        //从随机位开始
  if (takeStep(i1, i2))
  {
   return 1;
  }
        
    }
    return 0;
}


//----------------------------------------------------------------------------------------
//----------------------------------------------------------------------------------------
void SMO::smo_main_process()
{                       
 read_in_data();    //initialize
 if(!is_test_only)
 {
  alph.resize(end_support_i,0);  
  b=0;
  error_cache.resize(N);
 }

 self_dot_product.resize(N);
    precomputed_self_dot_product();              

 if (!is_test_only) 
 {
  numChanged = 0;
  examineAll = 1;
  while (numChanged > 0 || examineAll) 
  {
   numChanged = 0;
   if (examineAll) 
   { 
    for (int k = 0; k < N; k++)
     numChanged += examineExample (k);
            }
   else 
   { 
    for (int k = 0; k < N; k++)
     if (alph[k] != 0 && alph[k] != C)
      numChanged += examineExample (k);
            }
   if (examineAll == 1)
    examineAll = 0;
   else if (numChanged == 0)
    examineAll = 1;
  }
 }
    if (!is_test_only && svm_file_name != NULL) 
 {
  ofstream svm_file(svm_file_name);
  write_svm(svm_file);
 }
 if(!is_test_only)
 {
  int non_bound_support =0;
        int bound_support =0;
        for (int i=0; i<N; i++)
  {
   if (alph[i] > 0) 
   {
    if (alph[i] < C)
                    non_bound_support++;
                else
                    bound_support++;
   }
  }
  cerr << "non_bound=" << non_bound_support << '\t';
        cerr << "bound_support=" << bound_support << endl;
 }
 if(is_test_only)
 {
        cout<<error_rate()<<endl;
  output_data();
 }

 if(!is_test_only)
 {
  ofstream alph_file(output_see_alph);
  write_alph(alph_file);
 }

}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -