⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 work-2-1.cpp

📁 从N个无序数据中找K个最大值的快速算法; 数据挖掘课程作业。
💻 CPP
字号:
// Work-2-1.cpp : 定义控制台应用程序的入口点。
//

#include "stdafx.h"

/*
Data Mining Homework 2-1,
	Design a program to find k largest itemsets in a database of N items.

	Input:
		Items:12,34,45,23,45,38,...,87
		k = 10
	Output:
		N = 45
		10 largest items:87,...,45,38
*/

int ReadItems( const char * file, int * buf, int count )
{
	ifstream ifile;

	ifile.open( file, ios::in );
	if( ! ifile.is_open() )
		return 0;

	int i = 0;
	while( ! ifile.eof() && i < count )
	{
		ifile >> buf[ i ];
		i ++;
	}

	ifile.close();
	return i;
}

int WriteItems( const char * file, int * buf, int count )
{
	ofstream ofile;

	ofile.open( file, ios::out );
	if( ! ofile.is_open() )
		return 0;

	for( int i = 0; i < count; i ++ )
	{
		ofile << buf[ i ] << endl;
	}

	ofile.close();
	return i;
}

//阈值法 && 最大值替代法
int FindLargest3( int * val, int N, int * rlt, int k )
{
	int cmpcount = 0;
	int valsum = 0;
	srand( clock() );
	for( int i = 0; i < 1000; i ++ )
	{
		cmpcount ++;
		valsum += val[ rand() * N / RAND_MAX ];
	}
	valsum /= 1000;

	int threshold = 2 * valsum * ( N - k ) / N;

	for( int i = 0; i < k; i ++ )
		rlt[ i ] = threshold;


	for( int i = 0; i < N; i ++ )
	{
		for( int j = 0; j < k; j ++ )
		{
			cmpcount ++;

			if( val[ i ] > rlt[ j ] )
			{
				if( j > 0 )
					rlt[ j - 1 ] = rlt[ j ];
			}
			else
			{
				if( j > 0 )
					rlt[ j - 1 ] = val[ i ];
				break;
			}
		}

		if( j == k )
			rlt[ k - 1 ] = val[ i ];
	}

	cout << "Item compare times statistic: " << cmpcount << endl;
	return k;
}

//阈值法
int FindLargest2( int * val, int N, int * rlt, int k )
{
	int valsum = 0;
	srand( clock() );
	for( int i = 0; i < 100; i ++ )
		valsum += val[ rand() * N / RAND_MAX ];
	valsum /= 100;

	int threshold = 2 * valsum * ( N - k ) / N;

	int cmpcount = 0;

	int j = 0;
	for( int i = 0; i < N; i ++ )
	{
		cmpcount ++;

		if( val[ i ] > threshold )
		{
			rlt[ j ] = val[ i ];
			j ++;
		}

	}

	cout << "Item compare times statistic: " << cmpcount << endl;
	return j;
}

//最大值替代法
int FindLargest1( int * val, int N, int * rlt, int k )
{
	for( int i = 0; i < k; i ++ )
		rlt[ i ] = 0;

	int cmpcount = 0;

	for( int i = 0; i < N; i ++ )
	{
		for( int j = 0; j < k; j ++ )
		{
			cmpcount ++;

			if( val[ i ] > rlt[ j ] )
			{
				if( j > 0 )
					rlt[ j - 1 ] = rlt[ j ];
			}
			else
			{
				if( j > 0 )
					rlt[ j - 1 ] = val[ i ];
				break;
			}
		}

		if( j == k )
			rlt[ k - 1 ] = val[ i ];
	}

	cout << "Item compare times statistic: " << cmpcount << endl;
	return k;
}

const int MAX_ITEMS = 1024 * 64;
const int MAX_RLT = 1024;

int _tmain(int argc, _TCHAR* argv[])
{
	int k, N;
	int val[ MAX_ITEMS ];
	int rlt[ MAX_RLT ];


	//Input items...
	cout << "Data Mining Homework 2-1:" << endl;
	cout << "	Design a program to find K largest itemsets in a database of N items." << endl;
	cout << endl;
	cout << "Get items from " << argv[ 1 ] << endl;

	N = ReadItems( argv[ 1 ], val, MAX_ITEMS );
	sscanf( argv[ 2 ], "%d", & k );

	//Output some info
	cout << "Items count N: " << N << endl;
	cout << "To find count K: " << k << endl;

	//Calculate...
	k = FindLargest3( val, N, rlt, k );

	//Output items...
	WriteItems( argv[ 3 ], rlt, k );

	cout << k << " largest itemsets is placed into " << argv[ 3 ] << endl;

	getchar();
	return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -