📄 main.cpp

📁 朴素贝叶斯分类器
💻 CPP
字号:
#include<stdio.h>
#include<string.h>
#include<stdlib.h>
#include<math.h>
#include<algorithm>
using namespace std;

#define MIN -1000000000
//#define MAX  10000000000000000000
#define N 18000
#define M 6000
#define L 9
#define PI 3.141592653589
#define e 2.7182818284590
#define K 250
double trainset[N][L],test[M][L]; 
double AVG[K],D[K]; 

struct T{
int i;
double predict;
};
T result[M];

int cn=0;
/*读取训练集中的各个实体
选择3G的样本作为预测模型
将所有的类型转换为double型，统一处理
*/
void read_train_instance()
{
//	double a,b;
	char tmp[3000];
	char *token;
    char seps[]   = ",";
    int i=0,j=0;
	freopen("trainee.data","r",stdin);
    
	for(i=0;i<N;i++)
	{
      gets(tmp);
	 token = strtok( tmp, seps );
	 j=0;
                 while( token != NULL )
				 {
					 
        		  	   if(j==L-1)//类属性填值
					   if(strcmp(token,"2G")==0)
					   trainset[i][j]=0,i--;
					   else
					   trainset[i][j]=1,cn++;
				  else
				   trainset[i][j]=atof(token);				  
                   token = strtok( NULL, seps );
				   j++;			 	
				 }	
				
				
	}
		/*freopen("traineeout.data","w",stdout);
		for(i=0;i<cn;i++)
		{  
		
			  for(j=0;j<L;j++)
				printf("%lf ",trainset[i][j]);
			   printf("\n");
		
		}
		printf("%d\n",cn);
		//fclose(stdout);*/
	fclose(stdin);	
	
}
/*
读去测试集的类标记未知的样本，相对应转化为double型，为预测阶段做准备
*/
void read_test_instance()
{

	char tmp[3000];
	char *token;
    char seps[]   = ",";
    int i=0,j=0;
	freopen("test.data","r",stdin);
    
	for(i=0;i<M;i++)
	{
      gets(tmp);
	 token = strtok( tmp, seps );
	 j=0;
                 while( token != NULL )
				 {					         		   		
				   test[i][j]=atof(token);				  
                   token = strtok( NULL, seps );
				   j++;			 	
				 }				           
				
	}
	/*	freopen("testout.data","w",stdout);
		for(i=0;i<M;i++)
		{
			for(j=0;j<L-1;j++)
				printf("%lf ",test[i][j]);
			printf("\n");
		}
	fclose(stdin);*/
	fclose(stdin);
}
/*对离散型属性列的预测*/
double Deal_discrete(int i,int j)
{
   int x;
   double y=0.0;
   for(x=0;x<cn;x++)
	   if(test[i][j]==trainset[x][j])//计算在训练集中该属性下具有Xi值的训练样本个数//
		   y++;
	  y=y/cn;
	  return y;
}
/*对连续型属性列的预测*/
double Deal_continuous(int i,int j)
{
    double x,y,z;//用高斯密度函数来求概率
	x=test[i][j]-AVG[j];
	x*=x;
    x=x/(2*D[j]*D[j]);
	y=pow(e,-x);
	x=sqrt(2*PI)*D[j];
	z=y/x;
	return z;
}
void prediction()
{    
//printf("hello");
   double xx,precision;
   int i,j;

   for(i=0;i<8;i++)
	   AVG[i]=D[i]=0.0;

   
   for(j=0;j<8;j++)//对连续属性的列求平均值，标准差
	   if(j!=0&&j!=2&&j!=8)
	   {
		   xx=0;
         for(i=0;i<cn;i++)
		 {
			 AVG[j]+=trainset[i][j];//计算出总和
			 xx+=trainset[i][j]*trainset[i][j];//计算出平方和
		 }
		 AVG[j]/=cn;//求J列的平均值
		 double tp=(xx-cn*AVG[j]*AVG[j]);
		 D[j]=sqrt((xx-cn*AVG[j]*AVG[j])/(cn-1));//求J列的标准差
	   }


	 
 for(i=0;i<M;i++)
 {
	 precision=10e50;
	 for(j=0;j<8;j++)
	 {
		 if(j==0||j==2)
			 precision*=Deal_discrete(i,j);
		 else precision*=Deal_continuous(i,j);
	 }
	 result[i].i=i;
	 result[i].predict=precision;
 }

}
bool cmp(const T &a,const T &b)
{
	return a.predict>b.predict?1:0;
}
void print_result()
{
	int i;
	freopen("Output.txt","w",stdout);
	for(i=0;i<2000;i++)
		printf("%d %lf\n",result[i].i,result[i].predict);
	fclose(stdout);
}
void main()
{
	read_train_instance();
    read_test_instance();
	prediction();
	sort(result,result+M,cmp);
	print_result();
	return ;
}
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -