📄 gainofquote.cpp
字号:
/*********************************************************
// this program is used to compute info_gain
// of training data ,data sets is 8 attributes ,and delimed by ',';
//
// input::file of data sets Note : copy file to disk of root d:
//
// ouput::info-gain of datasets
//
// By Peisen Yuan
//
// 2006-1-3
///////////////////////////////////////////////////////////////////////*/
#include "fstream"
#include "iostream"
#include "cmath"
#include "cstdlib"
#include "iomanip"
using namespace std;
struct AttrNode { ////// Node of Attribute
char data[15];
int KindA;
int KindS;
int KindI;
int count;
struct AttrNode *next;
};
///////////////
struct Node {
char Name[10];
double quot;
};
///////////////
int main()
{
//////////////////////
int NumOfExample =0;
int NumA,NumI,NumS;
NumA=NumI=NumS=0;
int GetWord(char *buf,char dlim,int index,char *RetChar);
AttrNode AttrList[8];
AttrNode *pAttr,*ptr;
for(int n=0;n<8;n++)//////header of list ,initial header
{
AttrList[n].KindA=AttrList[n].KindS=AttrList[n].KindI=0;
AttrList[n].next=NULL;
AttrList[n].count=0;
}
strcpy(AttrList[0].data,"L_CORE");
strcpy(AttrList[1].data,"L_SURF");
strcpy(AttrList[2].data,"L_O2");
strcpy(AttrList[3].data,"L_BP");
strcpy(AttrList[4].data,"SURF_STBL");
strcpy(AttrList[5].data,"CORE_STBL");
strcpy(AttrList[6].data,"BP_STBL");
strcpy(AttrList[7].data,"COMFORT");
////////////////// for file /////////////////
FILE *fp=fopen("testdata.txt","r");
if(!fp){
cout<<"Can not open file "<<endl;
return -1;
}
////////////////////////////////////
int Index=0;//////////column
char buffer[100]="";
char Temp[15]="";
while(!feof(fp))
{
fgets(buffer,90,fp);//read a line of file
NumOfExample++;///// total of lines
switch(buffer[strlen(buffer)-2])
{
case 'A':NumA++;break;///// Attribute of this line
case 'S':NumS++;break;
case 'I':NumI++;break;
}
/////////////////////////////////////////////////////////
int pos=0;
Index=0;////////指示COLUM N的个数
while( pos<strlen(buffer)-3)//// error
{
pos=GetWord(buffer,',',pos,Temp);
// cout<<Temp<<" ";
Index++;
ptr=&AttrList[Index-1];
// bool flag=false;
while(strcmp(ptr->data,Temp)!=0 && ptr->next)
ptr=ptr->next;
if(strcmp(Temp,ptr->data)!=0)////找到了已经存在的项目
{//到最后没有相等的,则要新建一个节点
pAttr=new AttrNode;
strcpy(pAttr->data,""); //////initial new node
pAttr->count=0;
pAttr->KindA=0;
pAttr->KindS=0;
pAttr->next=NULL;
pAttr->KindI=0;
strcpy(pAttr->data,Temp);
pAttr->next=NULL;
ptr->next=pAttr ;
pAttr->count++;
switch(buffer[strlen(buffer)-2])
{
case 'A':pAttr->KindA+=1;break;
case 'S':pAttr->KindS+=1;break;
case 'I':pAttr->KindI+=1;break;
}
}/////////if
else
{
ptr->count+=1;
switch(buffer[strlen(buffer)-2])
{
case 'A':ptr->KindA+=1;break;
case 'S':ptr->KindS+=1;break;
case 'I':ptr->KindI+=1;break;
}
}
}/// while while( pos<strlen(buffer))///处理缓冲区
for(int i=0;i<strlen(buffer);i++)
{
buffer[i]='\0';
}
}//while while(!feof(fp))
fclose(fp);
/* ///////////////////////////////////////////////////////////////
////对统计的结果计算
cout<<endl<<NumOfExample<<endl;
cout<<"NUm Of KindA: "<<NumA<<endl;
cout<<"Num of KindS: "<<NumS<<endl;
cout<<"Num of KindI: "<<NumI<<endl; //////////// test /////////////
ptr=&AttrList[0];
cout<<ptr->next->count<<endl;
cout<<ptr->next->KindA <<endl;
///////////////////////////////////////////////////////////////// */
for( n=0;n<8;n++)
{
cout<<"The "<<n+1<<" th Attribute of list "<<endl;
ptr=&AttrList[n];
ptr=ptr->next;
while(ptr)
{
cout<<ptr->data<<" ";
cout<<"count:"<<ptr->count<<" ";
cout<<"KindA: "<<ptr->KindA<<" "; /////////// test
cout<<"KindS: "<<ptr->KindS<<" ";
cout<<"KindI: "<<ptr->KindI<<" ";
cout<<endl;
ptr=ptr->next;
}
cout<<endl<<endl;
}
///////////////////////// compute /////////////////////
double H,h,Gain,sum;
H=(double(NumI)/NumOfExample )*log(double(NumOfExample)/NumI)+\
(double(NumA)/NumOfExample )*log(double(NumOfExample)/NumA)+\
(double(NumS)/NumOfExample )*log(double(NumOfExample)/NumS);
H=H/log(2);
/*
cout<<"quot is :"<<H<<" bit";/////////// quot of info
cout<<endl<<endl;
//////////////////////////// test error ===>true */
///////////////////////////////////////////////////////////////////
Node NodeArray[8];
double Total=0.0;
for(n=0;n<8;n++)
{
ptr=&AttrList[n];
ptr=ptr->next;
h=0.0;
sum=0;
while(ptr)
{
if(ptr->KindA>0)
h=(double(ptr->KindA)/ptr->count)*log(double(ptr->count)/ptr->KindA);
if(ptr->KindS>0)
h+=(double(ptr->KindS)/ptr->count)*log(double(ptr->count)/ptr->KindS);
if(ptr->KindI>0)
h+=(double(ptr->KindI)/ptr->count)*log(double(ptr->count)/ptr->KindI);
h*=double(ptr->count)/NumOfExample;
sum=sum+h;
ptr=ptr->next;
}//////////条件商
Gain=H-sum/log(2);
strcpy(NodeArray[n].Name,AttrList[n].data);///// to a array
NodeArray[n].quot=Gain;
Total=Total+Gain;
cout<<"Gain of attribute :"<<AttrList[n].data<<" is :"<<Gain<<" bit."<<endl;
}
/* for(n=0;n<8;n++) ////test
{
cout<< NodeArray[n].Name<<" is: "<<NodeArray[n].quot<<" bit"<<endl;
}
cout<<endl; */
////////////////////// sort ///////////////////////////
double t;
char tchar[10];
cout<<"********************* The folowing is the sorted result **********************";
cout<<endl;
cout<<endl;
cout<<"信息商: "<<H<<endl;
cout<<"排序后的条件商: "<<endl;
for(int i=1;i<8;i++)
for(int j=0;j<i;j++)
{
if(NodeArray[i].quot>NodeArray[j].quot)
{
t=NodeArray[i].quot;
strcpy(tchar, NodeArray[i].Name);
NodeArray[i].quot=NodeArray[j].quot;
strcpy( NodeArray[i].Name, NodeArray[j].Name);
NodeArray[j].quot=t;//////////////// sort quot
strcpy( NodeArray[j].Name,tchar);
}
}
for(i=0;i<8;i++)
cout<<"Attribute :"<< setw(15)<<NodeArray[i].Name<< " "<<"quote: "<<NodeArray[i].quot<<" bit"<<" Weight is :"<<NodeArray[i].quot/Total<<endl;
cout<<endl;
cout<<"************************************ End ***************************************";
////////////////// output file //////////////
FILE *fpOut=fopen ("output.txt","w" );
if(!fpOut)
{
cout<<"Can not open output file"<<endl;
exit(1);
}
for(i=0;i<8;i++)
{
fprintf(fpOut,"%-20s : ",NodeArray[i].Name );
fprintf(fpOut, "%lf ",NodeArray[i].quot);
fprintf(fpOut,"\n");
}
fclose(fpOut);
return 0;
}
void InitNode(AttrNode *p)
{
strcpy(p->data,"00000000000000");
p->KindA=0;
p->KindS=0;
p->KindI=0;
p->count=0;
}//////////////////////
int GetWord(char *buf ,char lim,int index,char *RetChar)
{
///////////////////////////////////////////////
/// this function get a word from buf[index],word delim by char of dlim
/// Return index of next word
//////////////////////////////////////////////////
int i=index;
int j=0;
while( buf[i]!=lim && (i<strlen(buf)-2))
{
RetChar[j]=buf[i];
i++;
j++;
}
RetChar[j]='\0';
i++;
j=0;
return i;
}
/*
信息商: 0.940613
排序后的条件商:
Attribute : CORE_STBL quote: 0.0416893 bit Weight is :0.207891
Attribute : COMFORT quote: 0.0327474 bit Weight is :0.163301
Attribute : BP_STBL quote: 0.0325222 bit Weight is :0.162178
Attribute : L_BP quote: 0.0300606 bit Weight is :0.149903
Attribute : L_SURF quote: 0.0293591 bit Weight is :0.146404
Attribute : SURF_STBL quote: 0.0118933 bit Weight is :0.0593081
Attribute : L_O2 quote: 0.0114602 bit Weight is :0.0571484
Attribute : L_CORE quote: 0.010802 bit Weight is :0.0538662
*/
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -