⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 1.cpp

📁 一个利用KDD1999数据集而完成的改进K-means聚类算法的实现.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>
#include <malloc.h>
#include <math.h>
#include <time.h>

#include <iostream>
#include <vector>
using namespace std ;

typedef struct recorder
{
	float duration;           //连接的持续时间
	char pro_type[8];         //端口类型
	char service[8];          //服务类型
	char flag[5];             //标志域
	double src_dst_bytes[2];  //源或目的主机发向对方的字节数
	bool land;                //二值属性
	float w_u_h_n[4];
	bool log_in;              //二值属性
	float num_root[8];
	bool host_guest[2];       //二值属性
	float srv_etc[9];
	double dst_host[2];
	float host[8];
	char is_normal[20];
	struct recorder *next;    //方便同一簇中记录间连接起来
}Recorder;

//簇特征描述
typedef struct subContainer
{
	Recorder *pFirstRec;       //指向属于该簇的第一个记录
	Recorder *pLastRec;        //指向最后一个记录,方便插入
	unsigned int recordNum;    //该簇的记录个数
	Recorder clusterCenter;    //簇的质心,需要不断更新
	char is_normal[20];        //该簇被标记为正常还是异常
}SubContainer;

typedef vector<Recorder *> RecorderVector;     //定义全局容器类型  
typedef vector<SubContainer *> ClusterVector;  //簇容器类型

/**
 * @函数名	: ReadData
 * @brief	: 
 * @return	: void 
 * @param	: FILE *fp
 * @param	: Recorder *pRecorder
 * @param	: Recorder &avg_calc
 * @param	: unsigned int &calcNormal
 * @remark	: 读取文件记录
*/
void ReadData(FILE *fp, Recorder *pRecorder, Recorder &avg_calc, unsigned int &calcNormal)
{
	int i = 0;

	fscanf(fp, "%f, %[^,], %[^,], %[^,], %Lf, %Lf, %d, %f, %f, %f, %f, %d, %f, %f, %f, %f, %f, \
		%f, %f, %f, %d, %d, %f, %f, %f, %f, %f, %f, %f, %f, %f, %Lf, %Lf, %f, %f, %f, %f, \
		%f, %f, %f, %f,%[^.].\r\n", 
		&pRecorder->duration, pRecorder->pro_type, pRecorder->service, pRecorder->flag, \
		&pRecorder->src_dst_bytes[0], &pRecorder->src_dst_bytes[1], &pRecorder->land, \
		&pRecorder->w_u_h_n[0], &pRecorder->w_u_h_n[1], &pRecorder->w_u_h_n[2], \
		&pRecorder->w_u_h_n[3], &pRecorder->log_in, &pRecorder->num_root[0], \
		&pRecorder->num_root[1], &pRecorder->num_root[2], &pRecorder->num_root[3], \
		&pRecorder->num_root[4], &pRecorder->num_root[5], &pRecorder->num_root[6], \
		&pRecorder->num_root[7], &pRecorder->host_guest[0], &pRecorder->host_guest[1], \
		&pRecorder->srv_etc[0], &pRecorder->srv_etc[1], &pRecorder->srv_etc[2], \
		&pRecorder->srv_etc[3], &pRecorder->srv_etc[4], &pRecorder->srv_etc[5], \
		&pRecorder->srv_etc[6], &pRecorder->srv_etc[7], &pRecorder->srv_etc[8], \
		&pRecorder->dst_host[0], &pRecorder->dst_host[1], &pRecorder->host[0], \
		&pRecorder->host[1], &pRecorder->host[2], &pRecorder->host[3], &pRecorder->host[4], \
		&pRecorder->host[5], &pRecorder->host[6], &pRecorder->host[7], pRecorder->is_normal);
	strcat(pRecorder->is_normal, ".");

	if (0 == strcmp("normal.", pRecorder->is_normal))
	{
		calcNormal++;
	}

	//所有记录对应属性求和
	avg_calc.duration += pRecorder->duration;
	avg_calc.src_dst_bytes[0] += pRecorder->src_dst_bytes[0];
	avg_calc.src_dst_bytes[1] += pRecorder->src_dst_bytes[1];
	for (i = 0; i < 4; ++i)
	{
		avg_calc.w_u_h_n[i] += pRecorder->w_u_h_n[i];
	}
	for (i = 0; i < 8; ++i)
	{
		avg_calc.num_root[i] += pRecorder->num_root[i];
	}
	for (i = 0; i < 9; ++i)
	{
		avg_calc.srv_etc[i] += pRecorder->srv_etc[i];
	}
	avg_calc.dst_host[0] += pRecorder->dst_host[0];
	avg_calc.dst_host[1] += pRecorder->dst_host[1];
	for (i = 0; i < 8; ++i)
	{
		avg_calc.host[i] += pRecorder->host[i];
	}
}

/**
 * @函数名	: Standardize
 * @brief	: 
 * @return	: void 
 * @param	: RecorderVector &recorderContainer
 * @param	: Recorder &avg_vector
 * @remark	: 对记录进行标准化
*/
void Standardize(RecorderVector &recorderContainer, Recorder &avg_vector, Recorder &std_vector)
{
	int i;
	double temp = 1.0 / (recorderContainer.size() - 1);
	RecorderVector::iterator recorderIterator;

	//第一次遍历统计标准差
	for (recorderIterator = recorderContainer.begin(); 
	recorderIterator != recorderContainer.end(); recorderIterator++)
	{
		std_vector.duration += pow((*recorderIterator)->duration - avg_vector.duration, 2);
		std_vector.src_dst_bytes[0] += pow((*recorderIterator)->src_dst_bytes[0] - 
			avg_vector.src_dst_bytes[0], 2);
		std_vector.src_dst_bytes[1] += pow((*recorderIterator)->src_dst_bytes[1] - 
			avg_vector.src_dst_bytes[1], 2);
		for (i = 0; i < 4; ++i)
		{
			std_vector.w_u_h_n[i] += pow((*recorderIterator)->w_u_h_n[i] - 
				avg_vector.w_u_h_n[i], 2);
		}
		for (i = 0; i < 8; ++i)
		{
			std_vector.num_root[i] += pow((*recorderIterator)->num_root[i] - 
				avg_vector.num_root[i], 2);
		}
		for (i = 0; i < 9; ++i)
		{
			std_vector.srv_etc[i] += pow((*recorderIterator)->srv_etc[i] - 
				avg_vector.srv_etc[i], 2);
		}
		std_vector.dst_host[0] += pow((*recorderIterator)->dst_host[0] - 
			avg_vector.dst_host[0], 2);
		std_vector.dst_host[1] += pow((*recorderIterator)->dst_host[1] - 
			avg_vector.dst_host[1], 2);
		for (i = 0; i < 8; ++i)
		{
			std_vector.host[i] += pow((*recorderIterator)->host[i] - 
				avg_vector.host[i], 2);
		}
	}

	//真正的标准差
	std_vector.duration = sqrt(temp * std_vector.duration);
	std_vector.src_dst_bytes[0] = sqrt(temp * std_vector.src_dst_bytes[0]);
	std_vector.src_dst_bytes[1] = sqrt(temp * std_vector.src_dst_bytes[1]);
	for (i = 0; i < 4; ++i)
	{
		std_vector.w_u_h_n[i] = sqrt(temp * std_vector.w_u_h_n[i]);
	}
	for (i = 0; i < 8; ++i)
	{
		std_vector.num_root[i] = sqrt(temp * std_vector.num_root[i]);
	}
	for (i = 0; i < 9; ++i)
	{
		std_vector.srv_etc[i] = sqrt(temp * std_vector.srv_etc[i]);
	}
	std_vector.dst_host[0] = sqrt(temp * std_vector.dst_host[0]);
	std_vector.dst_host[1] = sqrt(temp * std_vector.dst_host[1]);
	for (i = 0; i < 8; ++i)
	{
		std_vector.host[i] = sqrt(temp * std_vector.host[i]);
	}
	
	//第二次遍历进行标准化
	for (recorderIterator = recorderContainer.begin(); 
	recorderIterator != recorderContainer.end(); recorderIterator++)
	{
		if (0 != std_vector.duration)
		{
			(*recorderIterator)->duration = ((*recorderIterator)->duration -
				avg_vector.duration) / std_vector.duration;
		}
		if (0 != std_vector.src_dst_bytes[0])
		{
			(*recorderIterator)->src_dst_bytes[0] = ((*recorderIterator)->src_dst_bytes[0] - 
			avg_vector.src_dst_bytes[0]) / std_vector.src_dst_bytes[0];
		}
		if (0 != std_vector.src_dst_bytes[1])
		{
			(*recorderIterator)->src_dst_bytes[1] = ((*recorderIterator)->src_dst_bytes[1] - 
			avg_vector.src_dst_bytes[1]) / std_vector.src_dst_bytes[1];
		}
		for (i = 0; i < 4; ++i)
		{
			if (0 != std_vector.w_u_h_n[i])
			{
				(*recorderIterator)->w_u_h_n[i] = ((*recorderIterator)->w_u_h_n[i] - 
				avg_vector.w_u_h_n[i]) / std_vector.w_u_h_n[i];
			}	
		}
		for (i = 0; i < 8; ++i)
		{
			if (0 != std_vector.num_root[i])
			{
				(*recorderIterator)->num_root[i] = ((*recorderIterator)->num_root[i] - 
				avg_vector.num_root[i]) / std_vector.num_root[i];
			}
		}
		for (i = 0; i < 9; ++i)
		{
			if (0 != std_vector.srv_etc[i])
			{
				(*recorderIterator)->srv_etc[i] = ((*recorderIterator)->srv_etc[i] - 
				avg_vector.srv_etc[i]) / std_vector.srv_etc[i];
			}
		}
		if (0 != std_vector.dst_host[0])
		{
			(*recorderIterator)->dst_host[0] = ((*recorderIterator)->dst_host[0] - 
			avg_vector.dst_host[0]) / std_vector.dst_host[0];
		}
		if (0 != std_vector.dst_host[1])
		{
			(*recorderIterator)->dst_host[1] = ((*recorderIterator)->dst_host[1] - 
			avg_vector.dst_host[1]) / std_vector.dst_host[1];
		}
		for (i = 0; i < 8; ++i)
		{
			if (0 != std_vector.host[i])
			{
				(*recorderIterator)->host[i] = ((*recorderIterator)->host[i] - 
				avg_vector.host[i]) / std_vector.host[i];
			}
		}
	}
}

/**
 * @函数名	: GetAverage
 * @brief	: 
 * @return	: void 
 * @param	: Recorder &avg_vector
 * @param	: int num
 * @remark	: 计算每个属性的平均值
*/
void GetAverage(Recorder &avg_vector, int num)
{
	int i;
	avg_vector.duration = avg_vector.duration / (float)num;
	avg_vector.src_dst_bytes[0] = avg_vector.src_dst_bytes[0] / (double)num;
	avg_vector.src_dst_bytes[1] = avg_vector.src_dst_bytes[1] / (double)num;
	for (i = 0; i < 4; ++i)
	{
		avg_vector.w_u_h_n[i] = avg_vector.w_u_h_n[i] / (float)num;
	}
	for (i = 0; i < 8; ++i)
	{
		avg_vector.num_root[i] = avg_vector.num_root[i] / (float)num;
	}
	for (i = 0; i < 9; ++i)
	{
		avg_vector.srv_etc[i] = avg_vector.srv_etc[i] / (float)num;
	}
	avg_vector.dst_host[0] = avg_vector.dst_host[0] / (double)num;
	avg_vector.dst_host[1] = avg_vector.dst_host[1] / (double)num;
	for (i = 0; i < 8; ++i)
	{
		avg_vector.host[i] = avg_vector.host[i] / (float)num;
	}
}

/**
 * @函数名	: CalcNumDif
 * @brief	: 
 * @return	: double 
 * @param	: Recorder *pRecord
 * @param	: Recorder &clusterCenter
 * @remark	: 计算连续属性差异
*/
double CalcNumDif(Recorder *pRecord, Recorder &clusterCenter)
{
	double numDif = 0.;
	int i = 0;
	
	numDif += pow(pRecord->duration - clusterCenter.duration, 2);
	numDif += pow(pRecord->src_dst_bytes[0] - clusterCenter.src_dst_bytes[0], 2);
	numDif += pow(pRecord->src_dst_bytes[1] - clusterCenter.src_dst_bytes[1], 2);
	for (i = 0; i < 4; ++i)
	{
		numDif += pow(pRecord->w_u_h_n[i] - clusterCenter.w_u_h_n[i], 2);
	}
	for (i = 0; i < 8; ++i)
	{
		numDif += pow(pRecord->num_root[i] - clusterCenter.num_root[i], 2);
	}
	for (i = 0; i < 9; ++i)
	{
		numDif += pow(pRecord->srv_etc[i] - clusterCenter.srv_etc[i], 2);
	}
	numDif += pow(pRecord->dst_host[0] - clusterCenter.dst_host[0], 2);
	numDif += pow(pRecord->dst_host[1] - clusterCenter.dst_host[1], 2);
	for (i = 0; i < 8; ++i)
	{
		numDif += pow(pRecord->host[i] - clusterCenter.host[i], 2);
	}
	return numDif;
}

/**
 * @函数名	: CalcFeaDif
 * @brief	: 
 * @return	: double 
 * @param	: Recorder *pRecord
 * @param	: Recorder *pFirstRecord
 * @param	: unsigned int totalRec
 * @remark	: 计算分类属性差异
*/
double CalcFeaDif(Recorder *pRecord, Recorder *pFirstRecord, unsigned int totalRec)
{
	double feaDif = 0;
	int i = 0;
	unsigned int appeaNum[7] = {0};
	
	while(NULL != pFirstRecord)
	{
		if (0 == strcmp(pRecord->pro_type, pFirstRecord->pro_type))
		{
			appeaNum[0]++;
		}
		if (0 == strcmp(pRecord->service, pFirstRecord->service))
		{
			appeaNum[1]++;
		}
		if (0 == strcmp(pRecord->flag, pFirstRecord->flag))
		{
			appeaNum[2]++;
		}
		if (pRecord->land == pFirstRecord->land)
		{
			appeaNum[3]++;
		}
		if (pRecord->log_in == pFirstRecord->log_in)
		{
			appeaNum[4]++;
		}
		if (pRecord->host_guest[0] == pFirstRecord->host_guest[0])
		{
			appeaNum[5]++;
		}
		if (pRecord->host_guest[1] == pFirstRecord->host_guest[1])
		{
			appeaNum[6]++;
		}
		
		pFirstRecord = pFirstRecord->next;
	}
	
	for (i = 0; i < 7; ++i)
	{
		feaDif += pow(1.0 - appeaNum[i] / (float)totalRec, 2);
	}
	return feaDif;
}

/**
 * @函数名	: UpdateCluster
 * @brief	: 
 * @return	: void 
 * @param	: SubContainer *pRecord
 * @remark	: 更新簇中心
*/
void UpdateCluster(SubContainer *pRecord)
{
	int i;
	int num = pRecord->recordNum;
	Recorder *pList = pRecord->pFirstRec;
	Recorder center ;
	memset(&center, 0 ,sizeof(Recorder));

	while(NULL != pList)
	{
		center.duration += pList->duration;
		center.src_dst_bytes[0] += pList->src_dst_bytes[0];
		center.src_dst_bytes[1] += pList->src_dst_bytes[1];
		for (i = 0; i < 4; ++i)
		{
			center.w_u_h_n[i] += pList->w_u_h_n[i];
		}
		for (i = 0; i < 8; ++i)
		{
			center.num_root[i] += pList->num_root[i];
		}
		for (i = 0; i < 9; ++i)
		{
			center.srv_etc[i] += pList->srv_etc[i];
		}
		center.dst_host[0] += pList->dst_host[0];
		center.dst_host[1] += pList->dst_host[1];
		for (i = 0; i < 8; ++i)
		{
			center.host[i] += pList->host[i];
		}

		pList = pList->next;
	}
	//求平均
	center.duration = center.duration / num;
	center.src_dst_bytes[0] = center.src_dst_bytes[0] / num;
	center.src_dst_bytes[1] = center.src_dst_bytes[1] / num;
	for (i = 0; i < 4; ++i)
	{
		center.w_u_h_n[i] = center.w_u_h_n[i] / num;
	}
	for (i = 0; i < 8; ++i)
	{
		center.num_root[i] = center.num_root[i] / num;
	}
	for (i = 0; i < 9; ++i)
	{
		center.srv_etc[i] = center.srv_etc[i] / num;
	}
	center.dst_host[0] = center.dst_host[0] / num;
	center.dst_host[1] = center.dst_host[1] / num;
	for (i = 0; i < 8; ++i)
	{
		center.host[i] = center.host[i] / num;
	}

	pRecord->clusterCenter = center;
}

/**
 * @函数名	: SetNewCluster
 * @brief	: 
 * @return	: bool 
 * @param	: Recorder *pRecord
 * @param	: ClusterVector &clusterContainer
 * @remark	: 生成新的簇
*/
bool SetNewCluster(Recorder *pRecord, ClusterVector &clusterContainer)
{
	SubContainer *pSubCon = (SubContainer *)malloc(sizeof(SubContainer));
	if (NULL == pSubCon)
	{
		printf("分配内存错误。\n");
		return 0;
	}
	memset(pSubCon, 0 , sizeof(SubContainer));
	
	pSubCon->pFirstRec = pRecord;
	pSubCon->pLastRec = pRecord;
	pSubCon->recordNum = 1;
	pSubCon->clusterCenter = *pRecord;
	
	clusterContainer.push_back(pSubCon);
	return 1;
}

void OutputInfo(RecorderVector &recorderContainer, ClusterVector &clusterContainer, 
				const unsigned int &calcAnormal, const unsigned int &calcNormal)
{
	//后续输出
	printf("总的记录的个数是:%d\n", recorderContainer.size());
	printf("其中簇的个数有 %u\n\n", clusterContainer.size());
	
	unsigned int correct = 0;
	unsigned int incorrect = 0;
	
	ClusterVector::iterator clusterIterator;
	for (clusterIterator = clusterContainer.begin(); 
	clusterIterator != clusterContainer.end(); ++clusterIterator)
	{
		printf("该簇中共有记录 %u\n", (*clusterIterator)->recordNum);
		int num = 0;
		Recorder *pList = (*clusterIterator)->pFirstRec;
		while (NULL != pList)
		{
			if (0 == strcmp("normal.", pList->is_normal))
			{
				num++;
			}
			pList = pList->next;
		}
		printf("其中正常记录 %d\n\n", num);
		//该簇被标记为攻击簇
		if ((*clusterIterator)->recordNum <= (recorderContainer.size() * 0.015))
		{
			//正确检测到的攻击数
			correct += (*clusterIterator)->recordNum - num;
			incorrect += num;
			strcpy((*clusterIterator)->is_normal, "anormal.");
		}
		//该簇被标记为正常簇
		else
		{
			strcpy((*clusterIterator)->is_normal, "normal.");
		}
	}
	printf("该阈值下的检测率是: %%%.3f\n", correct / (float)calcAnormal *100);
	printf("该阈值下的误报率是: %%%.3f\n", incorrect / (float)calcNormal *100);
}

/**
 * @函数名	: GetStadardize
 * @brief	: 
 * @return	: void 
 * @param	: FILE *pTest
 * @param	: RecorderVector &recorderTest
 * @param	: unsigned int &calcNormal
 * @remark	: 集成读记录标准化
*/
void GetStadardize(FILE *pTest, RecorderVector &recorderTest, unsigned int &calcNormal,
				   Recorder &testAverage, Recorder &std_vector)
{	
	Recorder *pTestRecorder = NULL;
	while (!feof(pTest))
	{
		pTestRecorder = (Recorder *)malloc(sizeof(Recorder));
		if (NULL == pTestRecorder)
		{
			printf("分配内存错误。\n");
			return;
		}
		memset(pTestRecorder, 0, sizeof(Recorder));
		
		//读入一条记录
		ReadData(pTest, pTestRecorder, testAverage, calcNormal);
		
		//把该指针压入容器
		recorderTest.push_back(pTestRecorder);
	}
	
	//求平均
	GetAverage(testAverage, recorderTest.size());
	
	//标准化
	Standardize(recorderTest, testAverage, std_vector);
}

void MinDis(Recorder *pRecorder, ClusterVector &clusterContainer, double &minTemp, 
			unsigned int &finalCluster)
{
	double numDif = CalcNumDif(pRecorder, clusterContainer.at(0)->clusterCenter);
	double feaDif = CalcFeaDif(pRecorder, clusterContainer.at(0)->pFirstRec,
		clusterContainer.at(0)->recordNum);
	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -