⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bigtree.cpp

📁 该代码是数据挖掘里面的决策树算法 利用ID3理论
💻 CPP
📖 第 1 页 / 共 2 页
字号:
// a.cpp : Defines the entry point for the console application.
//

#include "stdafx.h"
#include <iostream.h>
#include <fstream.h>
#include <stdlib.h>//暂停程序
#include <math.h>
#include <string.h>
//abs()绝对值 ln(x)求对数, exp(x)指数,sin(x),cos(x)三角函数,rand()随机函数
//floor(x)取整函数(<=x),ceil(x)取整函数(>=x),sqrt(x)开平方,power(x,y) 求x的y次方
//log(a)/log(2)求以2为底的对数值

static int nodeLay=0;//显示树结构时用
int doIndex=1;//确定最终被测试属性的位置
static char fileName[20]="data_1.txt",fileName2[20]="data_2.txt";
static int Right=0,Wrong=0,Total=0;//计算数据
//static char *pNAME[5]={"恒温?","有羽毛?","有皮?","能游泳?","会下蛋?"};
static char *pNAME[23]={"Classes","CapShape","CapSurface","CapColor","Bruises","Odor","GillAttachment","GillSpacing"
,"GillSize","GillColor","StalkShape","StalkRoot","StalkSurfaceAboveRing","StalkSurfaceBelowRing"
,"StalkColorAboveRing","StalkColorBelowRing","VeilType","VeilColor","RingNumber","RingType","SporePrintColor","Population","Habitat"};


struct Attribute
{
	char Value;//不能用指针表示
	char *Name;
	bool usedTag;
	Attribute *Next;
};

struct Point
{
	Attribute Value;
	Point *Next;
};

struct Amount
{
	int Value;//属性数量
	char aValue;//属性值
	Amount *Next;
};

struct ClassLink//分类链表
{
	int Total;//该类总数
	char aValue;//属性值
	ClassLink *Brother;//保存子链表数据
	ClassLink *Next;//保存父链表数据
};

struct Data//保存数量链表
{
	float Value;
	Data *Next;
};

struct Node
{
	char *Name;//属性名
	char Value;//属性值
	Node *Next,*Brother;//第一个节点及兄弟节点
};

float Calculate(Data *dHead,int Total)
{
	Data *dRear=new Data;
	float Result=0.0,temp=0.0;
	dRear=dHead;
	
	while(dRear->Next!=NULL)
	{
		dRear=dRear->Next;
		temp=(float)dRear->Value/Total;//注意括号包含的位置
		Result+=-temp*((float)log(temp)/log(2));
	};
	return Result;
};


void OutLine(Point *pNow)
{
	Point *pt=new Point;
	Attribute *at=new Attribute;
	pt=pNow;
	at=&pt->Value;
	while(at->Next!=NULL)
	{
		at=at->Next;
		cout<<at->Value;
		if(at->usedTag)
			cout<<"√ ";//被使用标记
		cout<<"  ";
	};
	cout<<endl;
};

void Out(Point *pHead)
{
	Point *pRear=new Point;
	pRear=pHead;
	int i=0;
	while(pRear->Next!=NULL)
	{
		Attribute *aRear=new Attribute;
		pRear=pRear->Next;
		aRear=&pRear->Value;
		cout<<" Point_"<<i++<<":\t";
		while(aRear->Next!=NULL)
		{
			aRear=aRear->Next;
			cout<<aRear->Value;
			if(aRear->usedTag)
				cout<<"√ ";//被使用标记
		//	else
				//cout<<"× ";
			cout<<" ";
		};
		cout<<endl;
	};
};

Node *FormTree(Node *nHead,Point *pHead,char *aName)
{
	Point *pRear=new Point;
	Amount *aAttrHead=new Amount[sizeof(pNAME)/4],*aAttrRear=new Amount[sizeof(pNAME)/4];
	int aNum=0,nFind=0,nLost=0,totalPoint=0,keyCheck=0,classNum[sizeof(pNAME)/4],iTemp=0;
	float Entropy[sizeof(pNAME)/4],checkValue=0.0,maxValue=0.0;
	bool isFind=false;

	for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		aAttrHead[aNum].Next=NULL;
		aAttrRear[aNum]=aAttrHead[aNum];
		Entropy[aNum]=0.0;//每个属性值的平均信息量
		classNum[aNum]=0;
	};

	pRear=pHead;

	bool checkAll=true;//检查是否所有属性都被使用过
		
	Attribute *aRear=new Attribute;
	if(pRear->Next!=NULL)
	{
		aRear=&pRear->Next->Value;
		while(aRear->Next!=NULL)
		{
			aRear=aRear->Next;
			if(aRear->Name==aName)//目标属性除外
				continue;
			if(!aRear->usedTag)
			{
				checkAll=false;
				break;
			}
		};
	};
	if(checkAll)
	{
		nHead->Next=NULL;
		return NULL;//如果目标属性除外的所有属性均被使用过,退出计算
	};

	pRear=pHead;
	//得出每个属性的分类个数
	//cout<<"计算第个属性的分类个数,请稍候.............."<<endl;
	while(pRear->Next!=NULL)
	{
		Attribute *aRear=new Attribute;
		pRear=pRear->Next;
		aRear=&pRear->Value;
		aNum=0;
		while(aRear->Next!=NULL)
		{
			aRear=aRear->Next;
			Amount *amTemp=new Amount;
			amTemp=&aAttrRear[aNum];
			isFind=false;
			while(amTemp->Next!=NULL)
			{
				amTemp=amTemp->Next;
				if(amTemp->aValue==aRear->Value)
				{
					amTemp->Value++;
					isFind=true;
					nLost++;
					break;
				}				
			};
			if(!isFind)
			{
				Amount *temp=new Amount;
				temp->aValue=aRear->Value;
				temp->Value=1;
				temp->Next=NULL;
				amTemp->Next=temp;
				amTemp=temp;
				classNum[aNum]++;
				nFind++;
			}
			if(aRear->Name==aName)
			{
				keyCheck=aNum;//选定要计算的属性值索引
			}
			aNum++;
		};
		totalPoint++;
	};

	if(classNum[keyCheck]==1)//目标属性只有一个分类
	{
		nHead->Next=NULL;
		return NULL;
	};

	//生成类数量链表
	//cout<<"正在生成类数量链表,请稍候.............."<<endl;
	ClassLink *clHead=new ClassLink[sizeof(pNAME)/4];
	for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		clHead[aNum].Next=NULL;
		Amount *amTemp=new Amount;
		amTemp=&aAttrRear[aNum];
		ClassLink *clTemp=new ClassLink;
		clTemp=&clHead[aNum];
		while(amTemp->Next!=NULL)
		{
			amTemp=amTemp->Next;
			ClassLink *temp=new ClassLink;
			temp->aValue=amTemp->aValue;
			temp->Total=amTemp->Value;
			temp->Next=NULL;
			temp->Brother=NULL;
			clTemp->Next=temp;
			clTemp=temp;
			//cout<<"属性:"<<temp->aValue<<" 值: "<<temp->Total<<"\t";
		}
		//cout<<classNum[aNum]<<endl;
		//cout<<endl;
	};

	//计算类与类的值数量关系
	//cout<<"计算参考类与目标类的值数量关系,请稍候.............."<<endl;
	pRear=pHead;
	while(pRear->Next!=NULL)
	{
		Attribute *aRear=new Attribute;
		Attribute *aTemp=new Attribute;
		pRear=pRear->Next;
		aRear=&pRear->Value;
		while(aRear->Next!=NULL)
		{
			aRear=aRear->Next;
			if(aRear->Name==aName)
			{
				aTemp=aRear;
				break;
			};
		};
		//cout<<aTemp->Name<<"="<<aTemp->Value<<endl;
		
		aRear=&pRear->Value;
		aNum=0;
		while(aRear->Next!=NULL)
		{
			aRear=aRear->Next;
			ClassLink *clTemp=new ClassLink;
			clTemp=&clHead[aNum];
			while(clTemp->Next!=NULL)
			{//在链表中找到该属性值的位置
				clTemp=clTemp->Next;
				//cout<<clTemp->aValue<<"="<<clTemp->Total<<"\t";
				if(aRear->Value==clTemp->aValue)
				{
					//cout<<aRear->Value<<"="<<clTemp->aValue<<endl;
					break;
				};
			};
			//cout<<endl;
			if(clTemp!=NULL)
			{
				ClassLink  *clbTemp=new ClassLink;
				clbTemp=clTemp;
				bool isFind=false;
				while(clbTemp->Brother!=NULL)
				{
					clbTemp=clbTemp->Brother;
					if(clbTemp->aValue==aTemp->Value)
					{
						isFind=true;
						clbTemp->Total++;
						break;
					}
				};
				if(!isFind)
				{
					ClassLink *temp=new ClassLink;
					temp->aValue=aTemp->Value;
					temp->Brother=NULL;
					temp->Next=NULL;
					temp->Total=1;
					clbTemp->Brother=temp;
					clbTemp=temp;
					//cout<<temp->aValue<<"="<<temp->Total<<"\t";
				};
			};
			aNum++;
		}
	};

	for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		ClassLink *clTemp=new ClassLink;
		clTemp=&clHead[aNum];
		while(clTemp->Next!=NULL)
		{
			clTemp=clTemp->Next;
			//cout<<"属性:"<<clTemp->aValue<<" 值: "<<clTemp->Total<<"\t"<<endl;
			ClassLink *clbTemp=new ClassLink;
			clbTemp=clTemp;
			while(clbTemp->Brother!=NULL)
			{
				clbTemp=clbTemp->Brother;
				//cout<<"  A="<<clbTemp->aValue<<"\tV="<<clbTemp->Total<<"\t";
			};
			//cout<<endl;
		};
		//cout<<"--------------------------------------------------------"<<endl;
	};
	
	//分别计算每个属性的熵
	//cout<<"计算每个属性的熵,请稍候.............."<<endl;
	Data *dlHead=new Data[sizeof(pNAME)/4],*dlRear=new Data[sizeof(pNAME)/4];
	for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		dlHead[sizeof(pNAME)/4].Next=NULL;
		dlRear[sizeof(pNAME)/4]=dlHead[sizeof(pNAME)/4];
	};

	for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		ClassLink *clTemp=new ClassLink;
		Data *dlhTemp=new Data;
		dlhTemp=&dlRear[aNum];
		clTemp=&clHead[aNum];
		float Result=0.0;
		while(clTemp->Next!=NULL)
		{
			clTemp=clTemp->Next;
			ClassLink *clbTemp=new ClassLink;
			Data *dHead=new Data,*dRear=new Data;
			dHead->Next=NULL;
			dRear=dHead;
			clbTemp=clTemp;
			while(clbTemp->Brother!=NULL)
			{
				clbTemp=clbTemp->Brother;
				Data *dTemp=new Data;
				dTemp->Value=(float)clbTemp->Total;
				dTemp->Next=NULL;
				dRear->Next=dTemp;
				dRear=dTemp;
			};
			dRear=dHead;
			Data *dlTemp=new Data;
			dlTemp->Next=NULL;
			dlTemp->Value=Calculate(dRear,clTemp->Total);
			dlhTemp->Next=dlTemp;
			dlhTemp=dlTemp;
			//cout<<"总熵:"<<dlTemp->Value<<"\t";
			if(keyCheck==aNum)
			{
				float t=(float)clTemp->Total/totalPoint;
				Result+=-t*(log(t)/log(2));
				checkValue=Result;
			};
		};
		//cout<<endl;
	};

	for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		if(keyCheck==aNum)
			continue;
		ClassLink *clTemp=new ClassLink;
		Data *dlhTemp=new Data;
		dlhTemp=&dlRear[aNum];
		clTemp=&clHead[aNum];
		Entropy[aNum]=0.0;
		float Result=0.0;
		while(clTemp->Next!=NULL)
		{
			clTemp=clTemp->Next;
			dlhTemp=dlhTemp->Next;
			//cout<<"("<<clTemp->Total<<"/"<<totalPoint<<")*"<<dlhTemp->Value<<" + ";
			Result+=((float)clTemp->Total/totalPoint)*dlhTemp->Value;
		};
		//cout<<"="<<Result<<endl;
		//cout<<aNum<<":  "<<Result<<endl;
		Entropy[aNum]=checkValue-Result;
	};
	

	/*for(aNum=0;aNum<sizeof(pNAME)/4;aNum++)
	{
		cout<<"索引"<<aNum<<": "<<Entropy[aNum]<<endl;
	};*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -