⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 kddcup.c

📁 这是一个用遗传算法对KDDCUP数据集
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <math.h>

#define POPSIZE 5000
#define GEN   100   
#define ATTRIB 8   //	8个基本属性

//全局变量
int gen=0; 
double pc=0.90;
double pm=0.10;
int normalcount; //正常模式的总数量
int attackcount; //攻击模式的总数量
int bestindex;
int worstindex;


struct kddcup_data { /* KDDCUP数据结构*/
	 int duration; 
    char protocol_type[10];
	char service[10];
	char flag[10];
	 int src_bytes;
	 int dst_bytes;
	 int land;
     int wrong_fragment;
	 int urgent;
	 int hot;
	 int num_failed_logins;
	 int logged_in;
	 int num_compromised;
	 int root_shell;
	 int su_attempted;
	 int num_root;
	 int num_file_creations;
	 int num_shells;
	 int num_access_files;
	 int num_outbound_cmds;
	 int is_host_login;
	 int is_guest_login;
	 int count;
	 int srv_count;
   float serror_rate;
   float srv_serror_rate;
   float rerror_rate;
   float srv_rerror_rate;
   float same_srv_rate;
   float diff_srv_rate;
   float srv_diff_host_rate;
     int dst_host_count;
     int dst_host_srv_count;
   float dst_host_same_srv_rate;
   float dst_host_diff_srv_rate;
   float dst_host_same_src_port_rate;
   float dst_host_srv_diff_host_rate;
   float dst_host_serror_rate;
   float dst_host_srv_serror_rate;
   float dst_host_rerror_rate;
   float dst_host_srv_rerror_rate;
    char idstype[10];
};



struct individual // 定义染色体数据结构  选择关于TCP连接的基本属性部分
{
	 int duration; 
     int protocol_type;
	 int service;
	 int flag;
	 int src_bytes;
	 int dst_bytes;
	 int land;
     int wrong_fragment;

	double fitness;
};

// sleep(5000);

struct kddcup_data kdata[5000]; 
struct kddcup_data ddata[5000]; 

struct individual bestindividual;
struct individual worstindividual;
struct individual thebest;
struct individual population[POPSIZE];

void readfiles();
int stringcmp(char *s1, char *s2);
void initpopulation();
void countfitness();
void select();
void cross();
void mutation();
void fbwest();

void main() 
{ 

    //printf("%d,%s,%s,%s,%s,%f",kdata[1].duration,kdata[1].protocol_type,kdata[1].service,kdata[1].flag,kdata[1].idstype,kdata[1].dst_host_srv_rerror_rate);  

	readfiles();

	initpopulation();//初始化种群

	countfitness();

	fbwest();

	gen=0;

	while( gen<GEN )
	{
		select();
		cross();
		mutation();
		countfitness();
		fbwest();	//如果父代的最优适应度值比子代最优适应度值大,则复制代替子代最差适应度值
		gen++;
		printf("\n the best fitness is : %f .\n",thebest.fitness);
	}
	printf("\n the the the  best best best fitness is : %f .\n",thebest.fitness);
	printf("\n%d,%d,%d,%d,%d,%d,%d,%d\n",thebest.duration,thebest.protocol_type,thebest.service,thebest.flag,thebest.src_bytes,thebest.dst_bytes,thebest.land,thebest.wrong_fragment);

}



void readfiles()
{
	FILE *fp; 
	char ch;
	int i,j;

	if((fp=fopen("normal.txt","r"))==NULL) 
		printf("can not open file\n"); 
	i=0;
	while(!feof(fp)) //文件输入到KDDCUP数据结构中
	{
		fscanf(fp,"%d",&kdata[i].duration);

		fscanf(fp,"%c",&ch); //得到TXT文件中的,号

		fscanf(fp,"%c",&ch); //得到TXT文件中的,和,间的字符串
		j=0;
	while (ch!=','&&ch!='\n') 
	{
		kdata[i].protocol_type[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);
	}
		kdata[i].protocol_type[j]='\0'; //写入字符串结束标志


	fscanf(fp,"%c",&ch); //作用同上
	j=0;
	while (ch!=',') 
	{
		kdata[i].service[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);
	//	printf("%c\n",ch);
	}
	kdata[i].service[j]='\0';

	fscanf(fp,"%c",&ch); //作用同上
	j=0;
	while (ch!=',') 
	{
		kdata[i].flag[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);

	}
	kdata[i].flag[j]='\0';


		fscanf(fp,"%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,",&kdata[i].src_bytes,&kdata[i].dst_bytes,&kdata[i].land,&kdata[i].wrong_fragment,&kdata[i].urgent,&kdata[i].hot,&kdata[i].num_failed_logins,&kdata[i].logged_in,&kdata[i].num_compromised,&kdata[i].root_shell,&kdata[i].su_attempted,&kdata[i].num_root,&kdata[i].num_file_creations,&kdata[i].num_shells,&kdata[i].num_access_files,&kdata[i].num_outbound_cmds,&kdata[i].is_host_login,&kdata[i].is_guest_login,&kdata[i].count,&kdata[i].srv_count); //作用同上


		fscanf(fp,"%f,%f,%f,%f,%f,%f,%f,%d,%d,%f,%f,%f,%f,%f,%f,%f,%f,",&kdata[i].serror_rate,&kdata[i].srv_serror_rate,&kdata[i].rerror_rate,&kdata[i].srv_rerror_rate,&kdata[i].same_srv_rate,&kdata[i].diff_srv_rate,&kdata[i].srv_diff_host_rate,&kdata[i].dst_host_count,&kdata[i].dst_host_srv_count,&kdata[i].dst_host_same_srv_rate,&kdata[i].dst_host_diff_srv_rate,&kdata[i].dst_host_same_src_port_rate,&kdata[i].dst_host_srv_diff_host_rate,&kdata[i].dst_host_serror_rate,&kdata[i].dst_host_srv_serror_rate,&kdata[i].dst_host_rerror_rate,&kdata[i].dst_host_srv_rerror_rate);


	fscanf(fp,"%c",&ch); //作用得到新字符
	j=0;
	while (ch!='.') 
	{
		kdata[i].idstype[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);

	}
	kdata[i].idstype[j]='\0';

		fscanf(fp,"%c",&ch);


	i++; //统计 KUPDATA的数据量

}

	normalcount=i;//总的NORMAL的KUPDATA的数据量

	fclose(fp); //关闭文件


	
	if((fp=fopen("attack.txt","r"))==NULL) 
		printf("can not open file\n"); 
	i=0;
	while(!feof(fp)) //文件输入到KDDCUP数据结构中
	{
		fscanf(fp,"%d",&ddata[i].duration);

		fscanf(fp,"%c",&ch); //得到TXT文件中的,号

		fscanf(fp,"%c",&ch); //得到TXT文件中的,和,间的字符串
		j=0;
	while (ch!=','&&ch!='\n') 
	{
		ddata[i].protocol_type[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);
	}
		ddata[i].protocol_type[j]='\0'; //写入字符串结束标志


	fscanf(fp,"%c",&ch); //作用同上
	j=0;
	while (ch!=',') 
	{
		ddata[i].service[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);
	//	printf("%c\n",ch);
	}
	ddata[i].service[j]='\0';

	fscanf(fp,"%c",&ch); //作用同上
	j=0;
	while (ch!=',') 
	{
		ddata[i].flag[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);

	}
	ddata[i].flag[j]='\0';


		fscanf(fp,"%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,%d,",&ddata[i].src_bytes,&ddata[i].dst_bytes,&ddata[i].land,&ddata[i].wrong_fragment,&ddata[i].urgent,&ddata[i].hot,&ddata[i].num_failed_logins,&ddata[i].logged_in,&ddata[i].num_compromised,&ddata[i].root_shell,&ddata[i].su_attempted,&ddata[i].num_root,&ddata[i].num_file_creations,&ddata[i].num_shells,&ddata[i].num_access_files,&ddata[i].num_outbound_cmds,&ddata[i].is_host_login,&ddata[i].is_guest_login,&ddata[i].count,&ddata[i].srv_count); //作用同上


		fscanf(fp,"%f,%f,%f,%f,%f,%f,%f,%d,%d,%f,%f,%f,%f,%f,%f,%f,%f,",&ddata[i].serror_rate,&ddata[i].srv_serror_rate,&ddata[i].rerror_rate,&ddata[i].srv_rerror_rate,&ddata[i].same_srv_rate,&ddata[i].diff_srv_rate,&ddata[i].srv_diff_host_rate,&ddata[i].dst_host_count,&ddata[i].dst_host_srv_count,&ddata[i].dst_host_same_srv_rate,&ddata[i].dst_host_diff_srv_rate,&ddata[i].dst_host_same_src_port_rate,&ddata[i].dst_host_srv_diff_host_rate,&ddata[i].dst_host_serror_rate,&ddata[i].dst_host_srv_serror_rate,&ddata[i].dst_host_rerror_rate,&ddata[i].dst_host_srv_rerror_rate);


	fscanf(fp,"%c",&ch); //作用得到新字符
	j=0;
	while (ch!='.') 
	{
		ddata[i].idstype[j]=ch;
		j++;
		fscanf(fp,"%c",&ch);

	}
	ddata[i].idstype[j]='\0';

		fscanf(fp,"%c",&ch);


	i++; //统计 KUPDATA的数据量

}

	attackcount=i;//总的attack的KUPDATA的数据量

	fclose(fp); //关闭文件
	printf("\n%d  %d\n",normalcount,attackcount);
	getchar();
	getchar();
}


void initpopulation() //初始化种群
{
	int i,t;
	srand((unsigned) time(NULL));

	for(i=0;i<POPSIZE;i++)
	{
		if((t=rand()%10)>8) 
		{
			population[i].duration=-1;//通配符 即什么都无所谓 在与KUPDATA匹配中跳过此项
		}
		else if(t<4)
			{
				population[i].duration=0;
			}
			else 
		population[i].duration=rand()%300;// 随机初始化


		if((rand()%10)>8) 
		{
			population[i].protocol_type=-1;
		}
		else 
		{
			population[i].protocol_type=rand()%3;// 每一个数代表一种协议 0:TCP 1:UDP 2:ICMP
		}


		if((rand()%10)>8) 
		{
			population[i].service=-1;
		}
		else 
		{
			population[i].service=rand()%5;// 总结一下常用的52种服务
		}


		if(t=(rand()%10)>8) 
		{
			population[i].flag=-1;
		}

/*		else if (t<4)
		{
			population[i].flag=2; // SF 较为常见 
		}*/
		else 
		{
			population[i].flag=rand()%6; // 总结一下常用的6种 5:SF 4:SH 3:S1 2:S0 1:REJ 0:RSTO
		}
        

		if((rand()%10)>5)   // 0.1 的概率为通配符
		{
			population[i].src_bytes=-1;
		}
		else 
		{
			population[i].src_bytes=rand()%1500;
		}

		if((rand()%10)>5)   //0.1 的概率为通配符
		{
			population[i].dst_bytes=-1;
		}
		else 
		{
			population[i].dst_bytes=rand()%15000;
		}
    

		if((rand()%10)>8)   //0.1 的概率为通配符
		{
			population[i].land=-1;
		}
		else 
		{
			population[i].land=rand()%5;
		}	

		if((rand()%10)>8)   //0.1 的概率为通配符
		{
			population[i].wrong_fragment=-1;
		}
		else 
		{
			population[i].wrong_fragment=rand()%5;
		}	


	}
/*
printf("\n\nthis is population result:");

for(i = 0 ; i < POPSIZE ; i++)
{
	printf("\n%d,%d,%d,%d,%d,%d,%d,%d\n",population[i].duration,population[i].protocol_type,population[i].service,population[i].flag,population[i].src_bytes,population[i].dst_bytes,population[i].land,population[i].wrong_fragment);
}	
*/
}


void 	countfitness()
{
	int i,j,vcount;
	float ap,np; //ap 为此个体与攻击文本匹配概率 ;np为此个体与正常文本匹配概率  fitness=ap-np  范围 【-1,1】
	char tempstr[10];

	for(i=0;i<POPSIZE;i++)
	{
		/*  规则个体对正常文件匹配概率*/
		vcount=0;

		for (j=0;j<normalcount;j++)  // 统计染色体与正常文本normal的匹配个数,匹配概率
		{

             if (population[i].duration!=-1 && population[i].duration!=kdata[j].duration)
				 continue;  //continue控制跳出此次循环;j++后继续 而break则跳出了for循环
			 if (population[i].protocol_type!=-1)
			 {
				 switch(population[i].protocol_type)
				 {
				   case 0 : strcpy(tempstr,"tcp");break;
				   case 1 : strcpy(tempstr,"udp");break;
				   case 2 : strcpy(tempstr,"icmp");break;
				 }
			    if (stringcmp(kdata[j].protocol_type,tempstr)!=0)
					 continue;

			 }

/* 在kddcup中总结出 有这么多SERVICE
http,private,smtp,finger,domain_u,eco_i,ntp_u,auth,ecr_i,telnet,ftp,other,
ftp_data,ssh,ldap,netbios_dgm,netbios_ns,netbios_ssn,imap4,sql_net,Z39_50,bgp,

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -