⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 gainofquote.cpp

📁 在数据挖掘算法中
💻 CPP
字号:
/*********************************************************
//        this program is used to compute info_gain
//        of training data ,data sets is 8 attributes ,and delimed by ',';
//        
//   input::file of data sets   Note : copy file to disk of root d:
//
//   ouput::info-gain of datasets
//
//         By Peisen  Yuan
//
//                                   2006-1-3
///////////////////////////////////////////////////////////////////////*/ 

#include "fstream"
#include "iostream"
#include  "cmath"
#include "cstdlib"
#include "iomanip"
using namespace std;



struct AttrNode {  //////         Node of Attribute 
		char data[15];
		int KindA;
		int KindS;
		int KindI;
		int count;
		struct AttrNode *next;
	};
///////////////
struct Node {
	char Name[10];
	double quot;
};
///////////////

int main()
{
	//////////////////////
	

	int NumOfExample =0;
	 int NumA,NumI,NumS;
	 NumA=NumI=NumS=0;

    int  GetWord(char *buf,char dlim,int index,char *RetChar);


	AttrNode AttrList[8];
	AttrNode *pAttr,*ptr;
	for(int n=0;n<8;n++)//////header of list ,initial header
	{
		
		AttrList[n].KindA=AttrList[n].KindS=AttrList[n].KindI=0;
		AttrList[n].next=NULL;
		AttrList[n].count=0;
	}
	strcpy(AttrList[0].data,"L_CORE");
	strcpy(AttrList[1].data,"L_SURF");
	strcpy(AttrList[2].data,"L_O2");
	strcpy(AttrList[3].data,"L_BP");
	strcpy(AttrList[4].data,"SURF_STBL");
	strcpy(AttrList[5].data,"CORE_STBL");
	strcpy(AttrList[6].data,"BP_STBL");
	strcpy(AttrList[7].data,"COMFORT");

	////////////////// for file /////////////////

	FILE   *fp=fopen("testdata.txt","r");
	if(!fp){
		cout<<"Can not open file "<<endl;
		return -1;
	}

	////////////////////////////////////
	int Index=0;//////////column
	char buffer[100]="";
    char Temp[15]="";
	
	while(!feof(fp))
	{
		fgets(buffer,90,fp);//read a line of file 
		NumOfExample++;///// total of lines 
		switch(buffer[strlen(buffer)-2])
		{
		case 'A':NumA++;break;/////  Attribute of this line 
		case 'S':NumS++;break;
		case 'I':NumI++;break;
		}

	/////////////////////////////////////////////////////////
		int pos=0;
	 
		Index=0;////////指示COLUM N的个数
		
		while( pos<strlen(buffer)-3)//// error
		{   
			pos=GetWord(buffer,',',pos,Temp);
		//	cout<<Temp<<"  ";
			Index++;

			ptr=&AttrList[Index-1];
		//	bool flag=false;
			while(strcmp(ptr->data,Temp)!=0 && ptr->next)
					ptr=ptr->next;


				if(strcmp(Temp,ptr->data)!=0)////找到了已经存在的项目
				 
				{//到最后没有相等的,则要新建一个节点
					pAttr=new AttrNode;
					strcpy(pAttr->data,"");					//////initial new node 
					pAttr->count=0;
					pAttr->KindA=0;
					pAttr->KindS=0;
					pAttr->next=NULL;
					pAttr->KindI=0;
					strcpy(pAttr->data,Temp);
					pAttr->next=NULL;
					ptr->next=pAttr ;
					pAttr->count++;
					switch(buffer[strlen(buffer)-2])
					{ 
					case 'A':pAttr->KindA+=1;break;
					case 'S':pAttr->KindS+=1;break;
					case 'I':pAttr->KindI+=1;break;
					}
				}/////////if 
				else
				{
					ptr->count+=1;
					switch(buffer[strlen(buffer)-2])
					{ 
					case 'A':ptr->KindA+=1;break;
					case 'S':ptr->KindS+=1;break;
					case 'I':ptr->KindI+=1;break;
					}
				}
			}/// while   while( pos<strlen(buffer))///处理缓冲区
		for(int i=0;i<strlen(buffer);i++)
		{
			buffer[i]='\0';
		}
		}//while   while(!feof(fp))
	fclose(fp);
	 

  /*     ///////////////////////////////////////////////////////////////
	////对统计的结果计算
 	cout<<endl<<NumOfExample<<endl;
	cout<<"NUm Of KindA: "<<NumA<<endl;
	cout<<"Num of KindS: "<<NumS<<endl;
	cout<<"Num of KindI: "<<NumI<<endl;     ////////////  test   /////////////
    ptr=&AttrList[0];
	cout<<ptr->next->count<<endl;
	cout<<ptr->next->KindA <<endl;
	///////////////////////////////////////////////////////////////// */
 	for( n=0;n<8;n++)
	{
		cout<<"The "<<n+1<<" th Attribute of list  "<<endl;
		ptr=&AttrList[n];
		ptr=ptr->next;
		while(ptr)
		{
			cout<<ptr->data<<"  ";
			cout<<"count:"<<ptr->count<<"  ";
			cout<<"KindA: "<<ptr->KindA<<"  ";   ///////////  test
			cout<<"KindS: "<<ptr->KindS<<"  ";
			cout<<"KindI: "<<ptr->KindI<<"  ";
			cout<<endl;
			ptr=ptr->next;
		}
		cout<<endl<<endl;
	}  
	///////////////////////// compute /////////////////////
    double	H,h,Gain,sum;
	H=(double(NumI)/NumOfExample )*log(double(NumOfExample)/NumI)+\
		(double(NumA)/NumOfExample )*log(double(NumOfExample)/NumA)+\
        (double(NumS)/NumOfExample )*log(double(NumOfExample)/NumS);
	H=H/log(2);
	/*
	cout<<"quot is :"<<H<<" bit";///////////  quot of info 
	cout<<endl<<endl;
////////////////////////////  test error ===>true */

	///////////////////////////////////////////////////////////////////
	Node NodeArray[8];
	double Total=0.0;

	for(n=0;n<8;n++)
	{
		ptr=&AttrList[n];
		ptr=ptr->next;
		h=0.0;
		sum=0;
		
		while(ptr)
		{
			if(ptr->KindA>0)
				h=(double(ptr->KindA)/ptr->count)*log(double(ptr->count)/ptr->KindA);
			if(ptr->KindS>0)
				h+=(double(ptr->KindS)/ptr->count)*log(double(ptr->count)/ptr->KindS);
			if(ptr->KindI>0)
				h+=(double(ptr->KindI)/ptr->count)*log(double(ptr->count)/ptr->KindI);
			h*=double(ptr->count)/NumOfExample;
			sum=sum+h;
			ptr=ptr->next;

		}//////////条件商
		Gain=H-sum/log(2);
		strcpy(NodeArray[n].Name,AttrList[n].data);/////  to a array
		NodeArray[n].quot=Gain;
		Total=Total+Gain;

		cout<<"Gain of attribute :"<<AttrList[n].data<<"  is :"<<Gain<<" bit."<<endl;
	}
/*	for(n=0;n<8;n++)       ////test
	{
		cout<< NodeArray[n].Name<<" is: "<<NodeArray[n].quot<<" bit"<<endl;
	}
	cout<<endl; */
	 //////////////////////    sort   ///////////////////////////
	double t;
	char tchar[10];
	cout<<"********************* The folowing is the sorted result **********************";
	cout<<endl;
	cout<<endl;
	cout<<"信息商: "<<H<<endl;
	cout<<"排序后的条件商: "<<endl;



	for(int i=1;i<8;i++)
		for(int j=0;j<i;j++)
		{
			if(NodeArray[i].quot>NodeArray[j].quot)
			{
				t=NodeArray[i].quot;
				strcpy(tchar, NodeArray[i].Name);

				NodeArray[i].quot=NodeArray[j].quot;
				strcpy( NodeArray[i].Name, NodeArray[j].Name);
				NodeArray[j].quot=t;////////////////      sort quot
				strcpy( NodeArray[j].Name,tchar);

			}
		}
		for(i=0;i<8;i++)
			cout<<"Attribute :"<< setw(15)<<NodeArray[i].Name<< "  "<<"quote: "<<NodeArray[i].quot<<" bit"<<"  Weight is :"<<NodeArray[i].quot/Total<<endl;
		cout<<endl;
		cout<<"************************************ End ***************************************";
		//////////////////  output file //////////////
		FILE *fpOut=fopen ("output.txt","w" );
		if(!fpOut)
		{
			cout<<"Can not open output file"<<endl;
			exit(1);
		}
		
		for(i=0;i<8;i++)
		{ 
				fprintf(fpOut,"%-20s : ",NodeArray[i].Name );
			    fprintf(fpOut, "%lf ",NodeArray[i].quot);
				fprintf(fpOut,"\n");
		}
		fclose(fpOut);
		

	return 0;
}



void InitNode(AttrNode *p)
	{
		strcpy(p->data,"00000000000000");
		p->KindA=0;
		p->KindS=0;
		p->KindI=0;
		p->count=0;
	}//////////////////////


int  GetWord(char *buf ,char lim,int index,char *RetChar)
{
	///////////////////////////////////////////////
	///   this function get a word  from buf[index],word delim by char of dlim
	/// Return index of next word 
	//////////////////////////////////////////////////
	int i=index;
	int j=0;
	
	while( buf[i]!=lim && (i<strlen(buf)-2))
	{
		RetChar[j]=buf[i];
		i++;
		j++;
	}
		RetChar[j]='\0';
		i++;
		j=0;
	
	return i;
}

/*
信息商: 0.940613
排序后的条件商:
Attribute :      CORE_STBL  quote: 0.0416893 bit  Weight is :0.207891
Attribute :        COMFORT  quote: 0.0327474 bit  Weight is :0.163301
Attribute :        BP_STBL  quote: 0.0325222 bit  Weight is :0.162178
Attribute :           L_BP  quote: 0.0300606 bit  Weight is :0.149903
Attribute :         L_SURF  quote: 0.0293591 bit  Weight is :0.146404
Attribute :      SURF_STBL  quote: 0.0118933 bit  Weight is :0.0593081
Attribute :           L_O2  quote: 0.0114602 bit  Weight is :0.0571484
Attribute :         L_CORE  quote: 0.010802 bit  Weight is :0.0538662
*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -