📄 mysmo.cpp
字号:
// mysmo.cpp: implementation of the mysmo class.
//
//////////////////////////////////////////////////////////////////////
#include "stdafx.h"
#include "smo.h"
#include "mysmo.h"
#include <functional>
#include <cmath>
#include <time.h>
#include <fstream.h>
#include <iostream.h>
//////////////////////////////////////////////////////////////////////
// Construction/Destruction
//////////////////////////////////////////////////////////////////////
mysmo::mysmo(float c,float tolerance,float eps)
{
m_sigma = 1.0f; //Sigma of kernel function
m_C = c; //Penalty factor (HIGH->high penalty for wrong side)
m_tolerance = tolerance; //Tolerance in KKT condition
m_eps = eps; //Break limit
//
initCsmo();
}
mysmo::~mysmo()
{
freeCsmo();
}
void mysmo::initCsmo()
{
m_a=NULL; //Lagrange multipliers (LM)
m_w=NULL; //Weight vector
m_y=NULL; //Classification of training points
m_self_dot_product=NULL; //vector of dot prod x*x for all x
m_e=NULL; //vector of prediction error Ei
m_x=NULL; //The training points
m_testIns=NULL; //The training points
m_errInsIndex=NULL;//testing err instance's index
m_max=NULL;
m_min = NULL;
m_xNum=0; //Number of training points
m_testNum=0; //number of testing points
m_dimension=0; //dimension of points
m_end_support_i=0; //Support vectors are from 0...end_support_i
m_b = 0; //Threshold (begin at 0)
m_two_sigma_squared = 2 * m_sigma * m_sigma;
}
void mysmo::freeCsmo()
{
if(m_a)delete[] m_a;
if(m_w)delete[] m_w;
if(m_y)delete[] m_y;
if(m_self_dot_product)delete[] m_self_dot_product;
if(m_e)delete[] m_e;
if(m_x)delete[] m_x;
if(m_testIns)delete[] m_testIns;
if(m_errInsIndex)delete[] m_errInsIndex;
if(m_max)delete[] m_max;
if(m_min)delete[] m_min;
initCsmo();
}
float mysmo::drand48()
{
return rand()/float(RAND_MAX);
// return 0;
}
int mysmo::tryLM(int i1)
{
float E1,y1,a1,r1; //Functional values
y1 = m_y[i1]; //Classification of point i1
a1 = m_a[i1]; //LM of point i1
if( (a1 > 0) && (a1 < m_C) )
{ //should mean r1=0 from KKT condition
E1 = m_e[i1];
}
else
{
E1 = learned_func(i1) - y1; //Calculation of prediction error
}
r1 = y1 * E1; //r1 quantity of KKT condition
if(((r1 < -m_tolerance) && (a1 < m_C)) || ((r1 > m_tolerance) && (a1 > 0)))
{ //If KKT is violated
//Try to find suitable example LM (index i2)
int k, i2;
float tmax;
//Try boundary points with the largest difference |E1 - E2|
for( i2 = (-1), tmax = 0, k = 0; k < m_end_support_i; k++)
{
if((m_a[k] > 0) && (m_a[k] < m_C))
{ //boundary points
float E2, temp;
E2 = m_e[k];
temp = fabs(E1 - E2);
if(temp > tmax){
tmax = temp;
i2 = k;
}
}
}
if(i2 >= 0)
{
if(optimize( i1, i2 ) == 1) return 1;
}
//Try all boundary points
int k0;
for(k0 = (int)(drand48() * m_end_support_i), k = k0; k < m_end_support_i + k0; k++)
{
i2 = k % m_end_support_i;
if((m_a[i2] > 0) && (m_a[i2] < m_C))
{
if(optimize(i1,i2) == 1) return 1;
}
}
//Try the entire set
for(k0 = (int)(drand48() * m_end_support_i), k = k0; k < m_end_support_i + k0; k++){
i2 = k % m_end_support_i;
if(optimize(i1,i2) == 1) return 1;
}
}
return 0;
}
int mysmo::optimize( int i1, int i2 )
{
float y1, y2, s;
float a1o, a2o; //old LM values
float a1n, a2n; //new LM values
float E1, E2, L, H, eta;
//Assignment of values
if(i1 == i2) return 0;
a1o = m_a[i1];
a2o = m_a[i2];
y1 = m_y[i1];
y2 = m_y[i2];
if((a1o > 0) && (a1o < m_C)) E1 = m_e[i1];
else E1 = learned_func(i1) - y1;
if((a2o > 0) && (a2o < m_C)) E2 = m_e[i2];
else E2 = learned_func(i2) - y2;
s = y1 * y2;
//Computation of L and H, low and high end a2n on line segment
if( y1 == y2 ) {
float sum = a1o + a2o;
if(sum > m_C){
L = sum - m_C;
H = m_C;
}
else{
L = 0;
H = sum;
}
}
else{
float diff = a1o - a2o;
if(diff > 0){
L = 0;
H = m_C - diff;
}
else{
L = -diff;
H = m_C;
}
}
if(L == H) return 0; //Error, no line segment
//Computation of eta
float k11 = kernel(i1,i1);
float k22 = kernel(i2,i2);
float k12 = kernel(i1,i2);
eta = 2 * k12 - k11 - k22; //Computation of eta from kernel
if(eta < -0.01){
a2n = a2o + y2 * (E2 - E1) / eta; //New a2 on line segment
if( a2n < L ) a2n = L; //Lowest a2n on line segment
else if( a2n > H ) a2n = H; //Highest a2n on line segment
}
else
{ //eta~0
float c1 = eta/2;
float c2 = y2 * (E1-E2) - eta * a2o;
float Lobj = c1 * L * L + c2 * L;
float Hobj = c1 * H * H + c2 * H;
if( Lobj > Hobj + m_eps ) a2n = L;
else if( Lobj < Hobj - m_eps ) a2n = H;
else a2n = a2o;
}
if( fabs(a2n - a2o) < (m_eps * (a2n + a2o + m_eps)) )
return 0;
a1n = a1o - s * (a2n - a2o); //New a1
if(a1n < 0)
{ //Disallowed case
a2n += s * a1n;
a1n = 0; //Limit case
}
else if(a1n > m_C)
{ //Disallowed case
a2n += s * (a1n - m_C);
a1n = m_C; //Limit case
}
//Update threshold b to reflect change in a
float b1, b2, bnew, deltab;
if((a1n > 0) && (a1n < m_C))
{
bnew = m_b + E1 + y1 * (a1n - a1o) * k11 + y2 * (a2n - a2o) * k12;
}
else
{
if((a2n > 0) && (a2n < m_C))
{
bnew = m_b + E2 + y1 * (a1n - a1o) * k12 + y2 * (a2n - a2o) * k22;
}
else
{
b1 = m_b + E1 + y1 * (a1n - a1o) * k11 + y2 * (a2n - a2o) * k12;
b2 = m_b + E2 + y1 * (a1n - a1o) * k12 + y2 * (a2n - a2o) * k22;
bnew = (b1 + b2) / 2;
}
}
/* unsigned char *p;
p = (unsigned char *)&bnew;
if((p[2]==128 && p[3]==127) || (p[2]==192 && p[3]==255 ))
p=0;
if(y1==y2 && a1n==a2o && a2n==a1o)
return 0;
*/ deltab = bnew - m_b;
m_b = bnew;
//Update e vector
float t1 = y1 * (a1n - a1o);
float t2 = y2 * (a2n - a2o);
for(int i = 0; i < m_end_support_i; i++)
{
if((0 < m_a[i]) && (m_a[i] < m_C))
{
m_e[i] += t1 * kernel(i1,i) + t2 * kernel(i2,i) - deltab;
}
}
m_e[i1] = 0.0;
m_e[i2] = 0.0;
m_a[i1] = a1n; //Store the optimized pair of LMs
m_a[i2] = a2n;
return 1; //Success!
}
float mysmo::dot_product(int i1, int i2)
{
//Dot product x*y
float dot = 0.;
float *p1 = m_x + i1*m_dimension;
float *p2 = m_x + i2*m_dimension;
for(int i = 0; i < m_dimension; i++)
dot += p1[i] * p2[i];
return dot;
}
float mysmo::kernel(int i1, int i2)//The kernel function
{
float s = dot_product(i1, i2); //Dot product x*y
s *= -2;
s += m_self_dot_product[i1] + m_self_dot_product[i2];
return exp( -s / m_two_sigma_squared ); //Gaussian RBF
}
float mysmo::learned_func(int k)//The learned function w*x - b= sum(ai*yi*ki) -b
{
float s = 0.;
for(int i = 0; i < m_end_support_i; i++)
{
if(m_a[i] > 0) s += m_a[i] * m_y[i] * kernel(i,k);
}
s -= m_b;
return s;
}
int mysmo::indicator_function(float *v)//Returns 1 or -1 as class. for v
{
float dec = decision_function(v);
if(dec > 0)
return 1;
else
return -1;
}
float mysmo::decision_function(float * v)//Calculates the decision func. at v
{
float dec = 0;
float s = 0; //v - x[i]
float *p = m_x;
float temp;
for(int i = 0; i < m_xNum; i++)
{
if(m_a[i]>0)
{
s = 0;
for(int j = 0; j < m_dimension; j++)
{
temp = v[j] - p[j];
s = s + temp * temp;
}
dec += m_y[i] * m_a[i] * exp( - s / m_two_sigma_squared );
}
p = p + m_dimension;
}
dec -= m_b;
return dec;
}
int mysmo::testing(char * fileName,unsigned int number)
{
unsigned int right=0,wrong=0;
ofstream fout;
char path[100];
getFileName(path,fileName);
strcat(path,".result");
// remove(path);
ifstream fin;//(FileName,ios::in|ios::nocreate);
fin.open(fileName,ios::in|ios::nocreate);
if(!fin.is_open())
return 1;
fout.open(path);
unsigned int i,j;
float *p=new float[m_dimension];
int y,decisionY;
unsigned char flag=0;
fout<<"id\t y \t decision"<<endl;
for(i=0;i<number;i++)
{
for(j=0;j<m_dimension;j++)
{
fin>>p[j];
if(fin.eof()==3)
goto End;
if(m_max[j] != m_min[j])
p[j] = (p[j] - m_min[j]) / (m_max[j] - m_min[j]);
else
p[j] = 0;
}
fin>>y;
if(fin.eof()==3)
goto End;
decisionY = indicator_function( p );
if(decisionY ==y)
right++;
else
{
fout<<i+1<<"\t"<<y<<"\t"<<decisionY<<endl;
wrong++;
}
}
fout<<"the rigth num: "<<right<<endl;
fout<<"the error num: "<<wrong<<endl;
precision = double(right)/(right+wrong);
fout<<"the precision: "<<precision<<endl;
End:
delete[] p;
fin.close();
fout.close();
return 0;
}
int mysmo::LMToFile(char * fileName)
{
remove(fileName);
ofstream out;
out.open(fileName,ios::app);
out << "dimension is:"<<m_xNum << endl;
out << "using gauss kernel function, para is:"<< m_two_sigma_squared << endl;
out << "the b in \'wx+b\' is :"<< m_b << endl;
out << "a = " << endl;
float sum = 0;
for(unsigned int i = 0; i < m_xNum; i++)
{
out << m_a[i] << endl;
}
return 0;
}
int mysmo::allocMemory(unsigned int number,unsigned int dimession)
{
freeCsmo();
m_x = new float[number * dimession];
m_y = new float[number];
m_a = new float[number]; //Lagrange multipliers (LM)
memset(m_a,0,number*sizeof(float));
m_e = new float[number]; //vector of prediction error Ei
memset(m_e,0,number*sizeof(float));
m_self_dot_product = new float[number]; //vector of dot prod x*x for all x
m_max = new float[dimession];
m_min = new float[dimession];
return 0;
}
int mysmo::readPoint(char * FileName,unsigned int number,unsigned int dimession)
{
//dimession not contain the decision
if(FileName == NULL || dimession==0 || number == 0)
return 1;
allocMemory(number,dimession);
ifstream fin;//(FileName,ios::in|ios::nocreate);
fin.open(FileName,ios::in|ios::nocreate);
if(!fin.is_open())
return 1;
unsigned int i,j;
float *p=m_x;
float *y = m_y;
unsigned char flag=0;
for(i=0;i<number;i++)
{
for(j=0;j<dimession;j++)
{
fin>>*p;
p++;
if(fin.eof()==3)
return 1;
}
fin>>*y;
if(fin.eof()==3)
return 1;
y++;
}
fin.close();
m_xNum = number;
m_dimension = dimession;
m_end_support_i = number;
return 0;
}
void mysmo::RenormalizeData(float * instance,unsigned int number)
{
float t;
float *p;
unsigned int k,l;
for(k = 0; k < m_dimension; k++)
{ //for every dimension
m_max[k] = instance[k];
m_min[k] = instance[k];
p=instance;
for(l = 0; l < number; l++)
{ //for every point
t = p[k];
if(t > m_max[k]) m_max[k] = t;
if(t < m_min[k]) m_min[k] = t;
p = p + m_dimension;
}
p=instance;
for(l = 0; l < number; l++)
{ //for every point
if (m_max[k] != m_min[k])
p[k] = (p[k] - m_min[k]) / (m_max[k] - m_min[k]);
else
p[k] = 0;
p = p + m_dimension;
}
}
}
int mysmo::pointToFile(char * FileName,float * x ,float * y,unsigned int number)
{
if(FileName==NULL || number==0 || x ==NULL)
return 1;
ofstream fout;
fout.open(FileName);
if(!fout.is_open())
return 1;
unsigned int i,j;
for(i=0;i<number;i++)
{
for(j=0;j<m_dimension;j++)
{
fout<<" "<<*x;
x++;
}
if(y)
{
fout<<*y;
y++;
}
fout<<endl;
}
fout.close();
return 0;
}
int mysmo::createSmo()
{
//To train machine
int i,j;
//Renormalize data to [0,1]
RenormalizeData(m_x,m_xNum);
// pointToFile("normtrain.data",m_x,0,m_xNum);
//Write normalized training data to file
//self dot product
float sdp_temp = 0;
float *p = m_x;
for(i = 0; i < m_xNum; i++)
{ //calculation of x*x
m_self_dot_product[i] = 0;
for(j = 0; j < m_dimension; j++){
m_self_dot_product[i] += (*p) * (*p);
p++;
}
}
int numChanged = 0;
int examineAll = 1;
//------------------------------------------------------
//Routine to examine all the points
while(numChanged > 0 || examineAll == 1)
{
numChanged = 0;
if(examineAll == 1)
{
for(int k = 0; k < m_xNum; k++)
{
numChanged += tryLM(k);
}
}
else
{
for(int k = 0; k < m_xNum; k++)
{
if((m_a[k] != 0) && (m_a[k] != m_C))
{
numChanged += tryLM(k);
}
}
}
if(examineAll == 1) examineAll = 0;
else if(numChanged == 0) examineAll = 1;
}
//-------------------------------------------------------
//--------------------------------------------------------------------
//To classify unclassified data using trained machine
//Read in data
//--------------------------------------------------------------------
return 0;
}
byte mysmo::getFileName(char * dest,const char * source)
{
if(source==NULL)
return 1;
unsigned int i,j = strlen(source) -1;
if(j==0)
return 1;
for(i=j; i>=0; i--)
{
if(source[i]=='.')
j = i;
else if(source[i]=='\\')
{
i++;
break;
}
}
strncpy(dest,source+i,j-i);
dest[j-i]='\0';
return 0;
}
int mysmo::scaleToFile(char * FileName,float * x ,float * y,unsigned int number)
{
if(FileName==NULL || number==0 || x ==NULL)
return 1;
ofstream fout;
fout.open(FileName);
if(!fout.is_open())
return 1;
unsigned int i,j;
for(i=0;i<number;i++)
{
if(y)
{
fout<<*y;
y++;
}
for(j=0;j<m_dimension;j++)
{
fout<<" "<<j+1<<":"<<*x;
x++;
}
fout<<endl;
}
fout.close();
return 0;
}
int mysmo::scale(char * FileName,unsigned int number,unsigned int dimession)
{
if(readPoint(FileName,number,dimession))
return 1;
char fileName[200];
RenormalizeData(m_x,m_xNum);
//Write results to out-svm.data
getFileName(fileName,FileName);
strcat(fileName,".scale");
scaleToFile(fileName,m_x,m_y,m_xNum);
return 0;
}
int mysmo::skipToFile(char * FileName,float * x ,float * y,unsigned int number)
{
if(FileName==NULL || number==0 || x ==NULL)
return 1;
ofstream fout;
fout.open(FileName);
if(!fout.is_open())
return 1;
unsigned int i,j;
for(i=0;i<number;i++)
{
for(j=0;j<m_dimension;j++)
{
if (m_max[j] != m_min[j])
fout<<*x<<" ";
x++;
}
if(y)
{
fout<<*y;
y++;
}
fout<<endl;
}
fout.close();
return 0;
}
int mysmo::skip(char * FileName,unsigned int number,unsigned int dimession)
{
if(readPoint(FileName,number,dimession))
return 1;
char fileName[200];
RenormalizeData(m_x,m_xNum);
//Write results to out-svm.data
getFileName(fileName,FileName);
strcat(fileName,".skip");
skipToFile(fileName,m_x,m_y,m_xNum);
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -