⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 1.cpp

📁 一个利用KDD1999数据集而完成的改进K-means聚类算法的实现.
💻 CPP
📖 第 1 页 / 共 2 页
字号:
	double totalDif = sqrt((numDif + feaDif) / 41.0);
	minTemp = totalDif;                    //存储最小的距离
	unsigned int cluster = -1;             //记录哪个簇的记录更近
	finalCluster = 0;                      //最终选定的簇
	
	ClusterVector::iterator clusterIterator;
	for (clusterIterator = clusterContainer.begin(); 
	clusterIterator != clusterContainer.end(); clusterIterator++)
	{
		++cluster;
		numDif = CalcNumDif(pRecorder, (*clusterIterator)->clusterCenter);
		feaDif = CalcFeaDif(pRecorder, (*clusterIterator)->pFirstRec, 
			(*clusterIterator)->recordNum);
		totalDif = sqrt((numDif + feaDif) / 41.0);
		
		if (totalDif < minTemp)
		{
			minTemp = totalDif;
			finalCluster = cluster;
		}
	}
}

void DetectionStadardize(Recorder avg_vector, Recorder std_vector, Recorder *pTestRecorder)
{
	int i = 0;
	if (0 != std_vector.duration)
	{
		pTestRecorder->duration = (pTestRecorder->duration -
			avg_vector.duration) / std_vector.duration;
	}
	if (0 != std_vector.src_dst_bytes[0])
	{
		pTestRecorder->src_dst_bytes[0] = (pTestRecorder->src_dst_bytes[0] - 
			avg_vector.src_dst_bytes[0]) / std_vector.src_dst_bytes[0];
	}
	if (0 != std_vector.src_dst_bytes[1])
	{
		pTestRecorder->src_dst_bytes[1] = (pTestRecorder->src_dst_bytes[1] - 
			avg_vector.src_dst_bytes[1]) / std_vector.src_dst_bytes[1];
	}
	for (i = 0; i < 4; ++i)
	{
		if (0 != std_vector.w_u_h_n[i])
		{
			pTestRecorder->w_u_h_n[i] = (pTestRecorder->w_u_h_n[i] - 
				avg_vector.w_u_h_n[i]) / std_vector.w_u_h_n[i];
		}	
	}
	for (i = 0; i < 8; ++i)
	{
		if (0 != std_vector.num_root[i])
		{
			pTestRecorder->num_root[i] = (pTestRecorder->num_root[i] - 
				avg_vector.num_root[i]) / std_vector.num_root[i];
		}
	}
	for (i = 0; i < 9; ++i)
	{
		if (0 != std_vector.srv_etc[i])
		{
			pTestRecorder->srv_etc[i] = (pTestRecorder->srv_etc[i] - 
				avg_vector.srv_etc[i]) / std_vector.srv_etc[i];
		}
	}
	if (0 != std_vector.dst_host[0])
	{
		pTestRecorder->dst_host[0] = (pTestRecorder->dst_host[0] - 
			avg_vector.dst_host[0]) / std_vector.dst_host[0];
	}
	if (0 != std_vector.dst_host[1])
	{
		pTestRecorder->dst_host[1] = (pTestRecorder->dst_host[1] - 
			avg_vector.dst_host[1]) / std_vector.dst_host[1];
	}
	for (i = 0; i < 8; ++i)
	{
		if (0 != std_vector.host[i])
		{
			pTestRecorder->host[i] = (pTestRecorder->host[i] - 
				avg_vector.host[i]) / std_vector.host[i];
		}
	}
}

int main()
{
	clock_t start, finish;
	double  duration;
	start = clock();

	FILE *fp = NULL;
	
	RecorderVector recorderContainer;              //所有记录容器
	RecorderVector::iterator recorderIterator;     //所有记录容器迭代器     

	unsigned int calcNormal = 0;                   //该数据集中总的正常
	unsigned int calcAnormal = 0;                  //该数据集中总的异常

	Recorder avg_vector;                           //存储训练集的平均值
	Recorder std_vector;                           //存储训练集的标准差用于检测阶段的使用
	memset(&avg_vector, 0, sizeof(avg_vector));
	memset(&std_vector, 0, sizeof(std_vector));

	fp = fopen("traning.txt", "r");
	if(NULL == fp)
	{
		printf("打开文件错误.\n");
		return -1;
	}

	GetStadardize(fp, recorderContainer, calcNormal, avg_vector, std_vector);
	
	//统计异常记录个数
	calcAnormal = recorderContainer.size() - calcNormal;
	
	//这里开始分簇
	ClusterVector clusterContainer;              //子簇容器

	unsigned int clusterNum = 0;                 //记录簇的个数
	float widthThreshold = 2.2f;              //宽度阈值1.4065f

	for (recorderIterator = recorderContainer.begin();
	recorderIterator != recorderContainer.end(); ++recorderIterator)
	{
		if (0 == clusterNum)                     //生成第一个簇
		{
			if (!SetNewCluster(*recorderIterator, clusterContainer))
			{
				return -1;
			}

			++clusterNum;
		}
		else                                     //不是第一个簇,要判断生成新的簇或找距离最小
		{
			//首先找到距离最小的簇
			//先计算数值属性的距离再考虑分类属性的距离
			double minTemp;                        //存储最小的距离
			unsigned int finalCluster = 0;         //最终选定的簇

			//计算离该记录距离最近的簇
			MinDis(*recorderIterator, clusterContainer, minTemp, finalCluster);

			//如果这个距离大于阈值则生成新的簇
			if (minTemp > widthThreshold)
			{
				if (!SetNewCluster(*recorderIterator, clusterContainer))
				{
					return -1;
				}

				++clusterNum;
			}
			//否则插入最小距离簇的末尾
			else
			{
				//加入簇末尾
				clusterContainer.at(finalCluster)->pLastRec->next = *recorderIterator;
				
				//更新簇的最后一个记录和簇中记录的个数
				clusterContainer.at(finalCluster)->pLastRec = *recorderIterator;
				clusterContainer.at(finalCluster)->recordNum += 1;

				//更新簇的质心
				UpdateCluster(clusterContainer.at(finalCluster));

			}
		}
	}

    OutputInfo(recorderContainer, clusterContainer, calcAnormal, calcNormal);
	finish = clock();
	duration = (double)(finish - start) / CLOCKS_PER_SEC;


	//测试聚类效果
	{
		FILE *pTest = fopen("test.txt", "r");
		if (NULL == pTest)
		{
			printf("打开测试文件失败.\n");
			return -1;
		}
		start = clock();

		RecorderVector recorderTest;

		RecorderVector::iterator testIterator;
		unsigned int testNormal = 0;           //测试集中正常的记录个数
		double minTemp;                        //存储最小的距离
		unsigned int finalCluster = 0;         //最终选定的簇
		int correct = 0;
		int incorrect = 0;

		Recorder testAverage;
		memset(&testAverage, 0, sizeof(testAverage));
		
		Recorder *pTestRecorder = NULL;
		while (!feof(pTest))
		{
			pTestRecorder = (Recorder *)malloc(sizeof(Recorder));
			if (NULL == pTestRecorder)
			{
				printf("分配内存错误。\n");
				return -1;
			}
			memset(pTestRecorder, 0, sizeof(Recorder));
			
			//读入一条记录
			ReadData(pTest, pTestRecorder, testAverage, testNormal);
			
			DetectionStadardize(avg_vector, std_vector, pTestRecorder);
			
			//把该指针压入容器
			recorderTest.push_back(pTestRecorder);
		}
		
        int num[53] = {0};
		int service[5] = {0};

		for (testIterator = recorderTest.begin(); testIterator != recorderTest.end(); 
		++testIterator)
		{
			//计算距离最近的簇
			MinDis(*testIterator, clusterContainer, minTemp, finalCluster);
			
			if (0 != strcmp((*testIterator)->is_normal, "normal.") && 
				0 == strcmp(clusterContainer.at(finalCluster)->is_normal, "anormal.")) 
			{
				correct++;

				//////////////////////////////////////////////////////////////////////////
				if (0 == strcmp("http", (*testIterator)->service))
				{
					if (0 == strcmp("backt.", (*testIterator)->is_normal))
					{
						num[0]++;
					}
					if (0 == strcmp("back.", (*testIterator)->is_normal))
					{
						num[1]++;
					}
					if (0 == strcmp("phf.", (*testIterator)->is_normal))
					{
						num[2]++;
					}
					if (0 == strcmp("ipsweep.", (*testIterator)->is_normal))
					{
						num[3]++;
					}
					if (0 == strcmp("nmap.", (*testIterator)->is_normal))
					{
						num[4]++;
					}
					if (0 == strcmp("portsweep.", (*testIterator)->is_normal))
					{
						num[5]++;
					}
					if (0 == strcmp("satan.", (*testIterator)->is_normal))
					{
						num[6]++;
					}
				}
				else if (0 == strcmp("smtp", (*testIterator)->service)
					|| 0 == strcmp("pop_3", (*testIterator)->service))
				{
					if (0 == strcmp("smtp", (*testIterator)->service) && 
						0 == strcmp("neptunet.", (*testIterator)->is_normal))
					{
						num[7]++;
					}
					if (0 == strcmp("smtp", (*testIterator)->service) && 
						0 == strcmp("neptune.", (*testIterator)->is_normal))
					{
						num[8]++;
					}
					if (0 == strcmp("pop_3", (*testIterator)->service) && 
						0 == strcmp("neptunet.", (*testIterator)->is_normal))
					{
						num[9]++;
					}
					if (0 == strcmp("pop_3", (*testIterator)->service) && 
						0 == strcmp("neptune.", (*testIterator)->is_normal))
					{
						num[10]++;
					}
					if (0 == strcmp("ipsweep.", (*testIterator)->is_normal))
					{
						num[11]++;
					}
					if (0 == strcmp("smtp", (*testIterator)->service) && 
						0 == strcmp("nmap.", (*testIterator)->is_normal))
					{
						num[12]++;
					}
					if (0 == strcmp("pop_3", (*testIterator)->service) && 
						0 == strcmp("nmap.", (*testIterator)->is_normal))
					{
						num[13]++;
					}
					if (0 == strcmp("smtp", (*testIterator)->service) && 
						0 == strcmp("portsweep.", (*testIterator)->is_normal))
					{
						num[14]++;
					}
					if (0 == strcmp("pop_3", (*testIterator)->service) && 
						0 == strcmp("portsweep.", (*testIterator)->is_normal))
					{
						num[15]++;
					}
					if (0 == strcmp("smtp", (*testIterator)->service) && 
						0 == strcmp("satan.", (*testIterator)->is_normal))
					{
						num[16]++;
					}
					if (0 == strcmp("pop_3", (*testIterator)->service) && 
						0 == strcmp("satan.", (*testIterator)->is_normal))
					{
						num[17]++;
					}
				}
				else if (0 == strcmp("ftp", (*testIterator)->service))
				{
					if (0 == strcmp("neptunet.", (*testIterator)->is_normal))
					{
						num[18]++;
					}
					if (0 == strcmp("neptune.", (*testIterator)->is_normal))
					{
						num[19]++;
					}
					if (0 == strcmp("buffer_overflow.", (*testIterator)->is_normal))
					{
						num[20]++;
					}
					if (0 == strcmp("loadmodule.", (*testIterator)->is_normal))
					{
						num[21]++;
					}
					if (0 == strcmp("rootkit.", (*testIterator)->is_normal))
					{
						num[22]++;
					}
					if (0 == strcmp("ftp_write.", (*testIterator)->is_normal))
					{
						num[23]++;
					}
					if (0 == strcmp("multihop.", (*testIterator)->is_normal))
					{
						num[24]++;
					}
					if (0 == strcmp("warezmaster.", (*testIterator)->is_normal))
					{
						num[25]++;
					}
					if (0 == strcmp("ipsweep.", (*testIterator)->is_normal))
					{
						num[26]++;
					}
					if (0 == strcmp("nmap.", (*testIterator)->is_normal))
					{
						num[27]++;
					}
					if (0 == strcmp("portsweep.", (*testIterator)->is_normal))
					{
						num[28]++;
					}
					if (0 == strcmp("satan.", (*testIterator)->is_normal))
					{
						num[29]++;
					}
				}
				else if (0 == strcmp("telnet", (*testIterator)->service))
				{
					if (0 == strcmp("land.", (*testIterator)->is_normal))
					{
						num[30]++;
					}
					if (0 == strcmp("neptunet.", (*testIterator)->is_normal))
					{
						num[31]++;
					}
					if (0 == strcmp("neptune.", (*testIterator)->is_normal))
					{
						num[32]++;
					}
					if (0 == strcmp("buffer_overflow.", (*testIterator)->is_normal))
					{
						num[33]++;
					}
					if (0 == strcmp("loadmodule.", (*testIterator)->is_normal))
					{
						num[34]++;
					}
					if (0 == strcmp("perl.", (*testIterator)->is_normal))
					{
						num[35]++;
					}
					if (0 == strcmp("rootkit.", (*testIterator)->is_normal))
					{
						num[36]++;
					}
					if (0 == strcmp("guess_passwdt.", (*testIterator)->is_normal))
					{
						num[37]++;
					}
					if (0 == strcmp("guess_passwd.", (*testIterator)->is_normal))
					{
						num[38]++;
					}
					if (0 == strcmp("multihop.", (*testIterator)->is_normal))
					{
						num[39]++;
					}
					if (0 == strcmp("spy.", (*testIterator)->is_normal))
					{
						num[40]++;
					}
					if (0 == strcmp("ipsweep.", (*testIterator)->is_normal))
					{
						num[41]++;
					}
					if (0 == strcmp("nmap.", (*testIterator)->is_normal))
					{
						num[42]++;
					}
					if (0 == strcmp("portsweep.", (*testIterator)->is_normal))
					{
						num[43]++;
					}
					if (0 == strcmp("satan.", (*testIterator)->is_normal))
					{
						num[44]++;
					}
				}
				else if (0 == strcmp("icmp", (*testIterator)->pro_type))
				{
					if (0 == strcmp("pod.", (*testIterator)->is_normal))
					{
						num[45]++;
					}
					if (0 == strcmp("smurft.", (*testIterator)->is_normal))
					{
						num[46]++;
					}
					if (0 == strcmp("smurf.", (*testIterator)->is_normal))
					{
						num[47]++;
					}
					if (0 == strcmp("ipsweep.", (*testIterator)->is_normal))
					{
						num[48]++;
					}
					if (0 == strcmp("nmapt.", (*testIterator)->is_normal))
					{
						num[49]++;
					}
					if (0 == strcmp("nmap.", (*testIterator)->is_normal))
					{
						num[50]++;
					}
					if (0 == strcmp("portsweep.", (*testIterator)->is_normal))
					{
						num[51]++;
					}
					if (0 == strcmp("satan.", (*testIterator)->is_normal))
					{
						num[52]++;
					}
				}

			}
			if (0 == strcmp((*testIterator)->is_normal, "normal.") && 
				0 == strcmp(clusterContainer.at(finalCluster)->is_normal, "anormal."))
			{
				incorrect++;

				//对每种服务的误报率进行统计
				if (0 == strcmp((*testIterator)->service, "http"))
				{
					service[0]++;
				}
				else if (0 == strcmp((*testIterator)->service, "smtp")
					|| 0 == strcmp((*testIterator)->service, "pop_3"))
				{
					service[1]++;
				}
				else if (0 == strcmp((*testIterator)->service, "ftp"))
				{
					service[2]++;
				}
				else if (0 == strcmp((*testIterator)->service, "telnet"))
				{
					service[3]++;
				}
				else if (0 == strcmp((*testIterator)->pro_type, "icmp"))
				{
					service[4]++;
				}
			}
			
		}
		
		printf("%d\n", recorderTest.size());
		printf("\n检测阶段的检测率是:%%%.3f\n", correct / (float)(recorderTest.size() - 
			testNormal) * 100);
		printf("检测阶段的误报率是:%%%.3f\n", incorrect / (float)testNormal * 100);	
		fclose(pTest);
		finish = clock();
		duration = (double)(finish - start) / CLOCKS_PER_SEC;

	}

	fclose(fp);
	
	return 0;
}

















//向excel文件中输入数据
/*	FILE *pWtrie = fopen("excel.xls", "w");
if (NULL == pWtrie)
{
printf("生成文件失败.\n");
return -1;
}
fwrite("123\t", 1, sizeof("123\t"), pWtrie);
fwrite("123", 1, sizeof("123"), pWtrie);
	fwrite("\n123", 1, sizeof("\n123"), pWtrie);
*/

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -