⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 browsedir.cpp

📁 贝叶斯学习算法源码
💻 CPP
📖 第 1 页 / 共 3 页
字号:
	{
		memset(WordReservoirInfo[ClassCnt - 1].ClassFielName,0,100);
		strcpy(WordReservoirInfo[ClassCnt - 1].ClassFielName,str);
		WordReservoirInfo[ClassCnt - 1].WordsSum = CN;
		
		if (ClassCnt - 1 == 0)
		{
			WordReservoirInfo[ClassCnt - 1].ClassFlag = 0;
			WordReservoirInfo[ClassCnt - 1].FileSum = m_nFileCount;
		}
		else if (ClassCnt - 1 == 1)
		{
			WordReservoirInfo[ClassCnt - 1].ClassFlag = 1;
			WordReservoirInfo[ClassCnt - 1].FileSum = m_nFileCount;
		}
	}
	delete M;
	delete swap;
	return 0;
}


int CBrowseDir::merge(Item x[], Item swap[], int k, int n)
{
	int i,j,l1,u1,l2,u2,m;
	l1 = 0;
	m = 0;
	while(l1 + k < n)
	{
		l2 = l1 + k;
		u1 = l2 - 1;
		u2 = (l2 + k - 1 <= n - 1) ? (l2 + k - 1) :(n - 1);	

		for( i  = l1,j = l2; i <= u1 && j <= u2 ;m++)
		{
			if(strcmp(x[i].str,x[j].str) <= 0)
			{
				Copy(swap[m],x[i]);
				i ++;
			}
			else
			{
				Copy(swap[m],x[j]);
				 j++;
			}
		}

		while(i <=  u1)
		{
			Copy(swap[m],x[i]);
			m ++;
			i ++;
		}
		while(j <= u2)
		{
			Copy(swap[m],x[j]);
			m ++;
			j ++;
		}
		l1 = u2 + 1;	
	}
	for( i =l1;i < n; i ++,m++)
		Copy(swap[m],x[i]);
	return 0;
}

int CBrowseDir::Copy(Item &K, const Item M)
{
	strcpy(K.str,M.str);
	K.freq = M.freq;
	K.density = M.density;
	K.Sequence = M.Sequence;
	K.FileCnt = M.FileCnt;
	K.flag = M.flag;
	K.gain = M.gain;
	K.SetInfo = M.SetInfo;
	
	return 0;
}


int CBrowseDir::CharDeal(char ch)
{
	int temp = 0;
	
	if (isalpha(ch))
		temp = 1;
	else if(isdigit(ch))
			temp = 2;
	else if (ch == '.')
		temp = 3;
	return temp;
}

int CBrowseDir::WriteInfo(char *filename)
{
	if(filename == 0)
		return -1;
	ofstream outCredit(filename,ios::out| ios::app);
	if(!outCredit)
	{
		cerr << "Output File could not be opened."  << endl;
		exit(1);
	}
    //
	WordReservoirInfo[ClassCnt - 1].FileSum = GetFileCount();
	
	outCredit << WordReservoirInfo[ClassCnt - 1].ClassFielName << ' ' 
			  << WordReservoirInfo[ClassCnt - 1].ClassFlag << ' '
			  << WordReservoirInfo[ClassCnt - 1].FileSum << ' '
			  << WordReservoirInfo[ClassCnt - 1].WordsSum <<'\n';

	return 0;
}


void CBrowseDir::ExtractFeature(const char *filename,const char *output )
{
	if (filename == NULL )
		return ;

	Feature *M = new Feature[70000];
	int i;
	
	char tempstr[100];
	memset(tempstr,0,100);
	strcpy(tempstr,m_szInitDir);
	strcat(tempstr,filename);
	int Cnt = CalculateGain(tempstr ,M, 70000,output);

	Mergesort(M,Cnt);

	memset(tempstr,0,100);
	strcpy(tempstr,m_szInitDir);
	strcat(tempstr,"Sort_accord_Gain.dat");

	ofstream outFile(tempstr,ios::out| ios::app);
	if(!outFile)
	{
		cerr << "Output File could not be opened."  << endl;
		exit(1);
	}

	for( i = 0; i < Cnt; i++)
	{	
		outFile << M[i].str << ' ' << M[i].gain << ' ' 
			<< M[i].NormalMail << ' ' << M[i].NormalMail_Prior	<< ' ' 
			<< M[i].UnNormalMail << ' ' << M[i].UnNormalMail_Prior << '\n';		
	}
	delete M;
	
}


int CBrowseDir::PreDeal(const char *filename,Item M[],int Num, char *Outfilename) //文档与处理函数
{
	if(filename == 0)
		return -1;
	
	char name[100];
	int eof;  
	char c; 
	char s = ' ';
	int i =0;	
	memset(name,0,100);

	if(Outfilename == 0)
	{

		strcpy(name,filename);
		int temp = strlen(filename) -1;
		while(name[temp] != '\\')
			temp --;
		name[temp + 1] = 0;
		strcat(name,"Cal.dat");

	}
	else
	{
		strcpy(name,Outfilename);
	}
	FILE *r=fopen(filename,"r");       //输入文件
	if( r == 0 )
	{ 
		return -1; 
	}

	FILE *w1=fopen(name,"w+");       //输出文件
	if( w1 == 0 )
	{ 
		return -1; 
	}
	
	for(eof=fscanf(r,"%c",&c);eof!=EOF && eof>0;eof=fscanf(r,"%c",&c))
	{
		if(isalpha(c))
		{			
			char ch = (char)tolower(c);
			fprintf( w1, "%c",ch );
		}
		else
		{
			fprintf( w1, "%c",s );
		}
	} 

    rewind(w1);
	
	char str[100] = "UnInit";	

	i = 0;
	int Len = 0;
	for(eof=fscanf(w1,"%s",&str);eof!=EOF && eof>0;eof=fscanf(w1,"%s",&str))
	{
		Len = strlen (str);
		if(Len >39)
		{
			//cout << filename;  输出含有异常长度字符串的文件名
			break;
		}
		str[Len] = 0;
		
		//增加停用词处理
		//ofstream fout("t.dat",ios::out | ios::ate);
		if(Len >= 3  && Seek( M,str,i)  == 0  && !Del_StopWord(str,m_StopWordNum))
		{			
			memset(M[i].str,0,40);
			strcpy(M[i].str,str);
			M[i].freq = 1;
			M[i].FileCnt = 1;
			M[i].density = 0.0;
			M[i].gain = 0.0;
			M[i].flag = 0;
			M[i].SetInfo = ClassCnt - 1;

			i++;
		}
		memset(str,0,40);
	} 

	fclose(w1);
	fclose(r);

	return i;
}

int CBrowseDir::MergeInGain(Feature x[], Feature swap[], int k, int n)    //按降序排列
{
	int i,j,l1,u1,l2,u2,m;
	l1 = 0;
	m = 0;
	while(l1 + k < n)
	{
		l2 = l1 + k;
		u1 = l2 - 1;
		u2 = (l2 + k - 1 <= n - 1) ? (l2 + k - 1) :(n - 1);	

		for( i  = l1,j = l2; i <= u1 && j <= u2 ;m++)
		{
			if(x[i].gain * 1000 > x[j].gain * 1000)
			{

				strcpy(swap[m].str,x[i].str);
				swap[m].gain = x[i].gain;
				swap[m].NormalMail = x[i].NormalMail;			
				swap[m].NormalMail_Prior = x[i].NormalMail_Prior;
				swap[m].UnNormalMail = x[i].UnNormalMail;
				swap[m].UnNormalMail_Prior = x[i].UnNormalMail_Prior;
				i ++;
			}
			else
			{
				strcpy(swap[m].str,x[j].str);
				swap[m].gain = x[j].gain;
				swap[m].NormalMail = x[j].NormalMail;			
				swap[m].NormalMail_Prior = x[j].NormalMail_Prior;
				swap[m].UnNormalMail = x[j].UnNormalMail;
				swap[m].UnNormalMail_Prior = x[j].UnNormalMail_Prior;
				 j++;
			}
		}

		while(i <=  u1)
		{
			strcpy(swap[m].str,x[i].str);
			swap[m].gain = x[i].gain;
			swap[m].NormalMail = x[i].NormalMail;			
			swap[m].NormalMail_Prior = x[i].NormalMail_Prior;
			swap[m].UnNormalMail = x[i].UnNormalMail;
			swap[m].UnNormalMail_Prior = x[i].UnNormalMail_Prior;
			m ++;
			i ++;
		}
		while(j <= u2)
		{
			strcpy(swap[m].str,x[j].str);
			swap[m].gain = x[j].gain;
			swap[m].NormalMail = x[j].NormalMail;			
			swap[m].NormalMail_Prior = x[j].NormalMail_Prior;
			swap[m].UnNormalMail = x[j].UnNormalMail;
			swap[m].UnNormalMail_Prior = x[j].UnNormalMail_Prior;
			m ++;
			j ++;
		}
		l1 = u2 + 1;	
	}
	for( i =l1;i < n; i ++,m++)
	{
		strcpy(swap[m].str,x[i].str);
		swap[m].gain = x[i].gain;
		swap[m].NormalMail = x[i].NormalMail;			
		swap[m].NormalMail_Prior = x[i].NormalMail_Prior;
		swap[m].UnNormalMail = x[i].UnNormalMail;
		swap[m].UnNormalMail_Prior = x[i].UnNormalMail_Prior;
	}
	return 0;
}

int CBrowseDir::Mergesort(Feature feature[], int Num)       
{
	Feature *swap = new Feature[Num];
	if(swap == 0)
		return -1;
	int k = 1,i;
	
	while( k < Num)
	{
		MergeInGain(feature,swap,k,Num);
		for( i = 0;i < Num ;i ++)
		{
			strcpy(feature[i].str,swap[i].str);
			feature[i].gain = swap[i].gain;
			feature[i].NormalMail = swap[i].NormalMail;			
			feature[i].NormalMail_Prior = swap[i].NormalMail_Prior;
			feature[i].UnNormalMail = swap[i].UnNormalMail;
			feature[i].UnNormalMail_Prior = swap[i].UnNormalMail_Prior;
		}
		 k = k * 2;
	}

	delete swap;
	return 0;
}

int CBrowseDir::Unique(Item X[], int Num)
{
	int Value = 0;

	Item *swap = new Item[70000];

	int count = Num;
	int i ,k;
	k = 1;
	while( k < count)
	{
		merge(X,swap,k,count - 1);
		for( i = 0;i < count - 1;i ++)
		{
			Copy(X[i],swap[i]);
		}
		cout << endl;
		 k = k * 2;
	}

	delete swap;


	for( i = 0; i < Num; i++)
	{
		for(int j = 1; j < 5; j ++)
		{
			if(i + j < Num )
			{
				if(strcmp(X[i].str,X[i + j].str) == 0)
				{
					X[i].density += X[i + j].density;
					X[i].freq += X[i + j].freq;
					
					if(X[i].Sequence != X[i + j].Sequence)
						X[i].FileCnt ++;
					X[i + j].flag = -1;
				}
			}
		}
	}
	
	Item *Swap = new Item[Num];
	if (Swap == 0)
		return -1;
	i = 0;
	
	while( i < Num)
	{
		if(X[i].flag != -1)
		{
			Copy(Swap[Value],X[i]);
			Value ++;
		}		
		i ++;
	}
	
	for(i = 0 ;i < Value; i ++)
	{
		Copy(X[i],Swap[i]);
	}
	
	//将重复的X[i]值为无意义
	for(i = Value ;i < Num; i ++)
	{
		memset(X[i].str,0,40);
		X[i].SetInfo = -1;
		X[i].flag = -1;
		X[i].freq = -1;
		X[i].gain = -1;
		X[i].density = -1;
		X[i].FileCnt = -1;
		X[i].Sequence = -1;
	}
	
	delete Swap;
	return Value;
}


int CBrowseDir::ShellSort(Feature M[], int N)    //按增益降序排列
{
	int gap = 0;
	int n = N;
	for(gap = n / 2; gap > 0; gap /= 2)
		for(int i = gap; i < n; i++)
			for(int j = i - gap; j >= 0; j -= gap)	
			{
				if(M[j].gain < M[j + gap].gain)
				{
					Feature x;
					CopyFeature(x,M[j]);
					CopyFeature(M[j],M[j + gap]);
					CopyFeature(M[j + gap],x);
				}
			}			
			return 0;
}

int CBrowseDir::CopyFeature(Feature &K, const Feature X)
{
	strcpy(K.str,X.str);
	K.gain = X.gain;
	K.NormalMail = X.NormalMail;
	K.NormalMail_Prior = X.NormalMail_Prior;
	K.UnNormalMail = X.UnNormalMail;
	K.UnNormalMail_Prior = X.UnNormalMail_Prior;
	
	return 0;
}


int CBrowseDir::MergeInString(Feature x[], Feature swap[], int k, int n)
{
	
	int i,j,l1,u1,l2,u2,m;
	l1 = 0;
	m = 0;
	while(l1 + k < n)
	{
		l2 = l1 + k;
		u1 = l2 - 1;
		u2 = (l2 + k - 1 <= n - 1) ? (l2 + k - 1) :(n - 1);	

		for( i  = l1,j = l2; i <= u1 && j <= u2 ;m++)
		{
			if(strcmp(x[i].str,x[j].str) >= 0)
			{
				//Copy(swap[m],x[i]);
				strcpy(swap[m].str,x[i].str);
				swap[m].gain = x[i].gain;
				swap[m].NormalMail = x[i].NormalMail;			
				swap[m].NormalMail_Prior = x[i].NormalMail_Prior;
				swap[m].UnNormalMail = x[i].UnNormalMail;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -