⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 similarity.cpp

📁 简单处理两个句子中的相似度对比问题,具体用法很简单,在main函数中写入句子即可
💻 CPP
字号:
#include "similarity.h"
#include "math.h"

vector<string> Biaodian;
vector<string> Stopword;
vector<string> WordArray;
vector<SWORD> TextArray1;
vector<SWORD> TextArray2;

double sprod_ns(double *vecn,double *vec_s,int n) 
{
  register double sum=0;
  int m=0;
  for(m=0;m<n;m++){
	  if(vec_s[m]>0)
    sum+=((vecn[m])*(vec_s[m]));
  }
  return(sum);
}

int loadbiaodian(string Filename)
{
	ifstream ifs(Filename.c_str());
    if (!ifs) 
	{
           cout << "Can not open " << Filename << endl;
           return 1;
    }

	string strLine;
	while (getline(ifs, strLine)) 
	{
		Biaodian.push_back(strLine);
	}
	return 0;
}

int loadStopword(string Filename)
{
	ifstream ifs(Filename.c_str());
    if (!ifs) 
	{
           cout << "Can not open " << Filename << endl;
           return 1;
    }

	string strLine;
	while (getline(ifs, strLine)) 
	{
		Stopword.push_back(strLine);
	}
	return 0;
}

int caltf(string text,vector<SWORD> &TextArray)
{
    char *seps=("\t\r\n 0 1 2 3 4 5 6 7 8 9 . , ? : & ");  
    int nTextLength=0;
	char *token;
	int wordnum = 0;
	char *buf_out;
	SWORD tmp;
	nTextLength = text.size();
    buf_out=new char[3*nTextLength];
	SSPS((char*)text.c_str(), buf_out);
	token = strtok( buf_out, seps);
	while( token != NULL )
	{
	   if(find(Biaodian,token)<0)
	   {
			if(strlen(token)>2)
			{
				if(find(Stopword,token)<0)
				{
					cout<<token<<endl;
					int pos = find(WordArray,token);  //查找在总词库中的位置
					if(pos >= 0)
					{
						int position = find(TextArray,pos);
						if(position >=0)
							TextArray[position].weight++;
						else
						{
							tmp.wnum = pos;	
							tmp.weight = 1;
							TextArray.push_back(tmp);
						}
					}
					else
					{
						WordArray.push_back(token);
						tmp.wnum = WordArray.size()-1;	
						tmp.weight = 1;
						TextArray.push_back(tmp);
					}
				}
			}
			wordnum++;
	   }
	    token = strtok( NULL, seps );
	}
	  
	  int size = TextArray.size();
	  for(int i=0;i<size;i++)
	  {
		  TextArray[i].weight=TextArray[i].weight/wordnum;
	  }
	  delete []buf_out;
	return 0;
}

double Compute_sim(string text1,string text2)
{
    double similarity;
    caltf(text1,TextArray1);
	caltf(text2,TextArray2);
    int nsize = WordArray.size();
	double *textfreq1,*textfreq2;
	double textlen1,textlen2;
	textlen1 = 0;
	textlen2 = 0;
	textfreq1 = new double[nsize+1];
	textfreq2 = new double[nsize+1];
	for(int i= 0;i<nsize;i++)
	{
		textfreq1[i]=0;
		textfreq2[i]=0;
	}
	int size = TextArray1.size();
	for(i = 0;i<size;i++)
	{
		textfreq1[TextArray1[i].wnum] = TextArray1[i].weight;
	//	cout << WordArray[TextArray1[i].wnum] <<"   "<<TextArray1[i].weight<<endl;
	}

	size = TextArray2.size();
	for(i = 0;i<size;i++)
	{
		textfreq2[TextArray2[i].wnum] = TextArray2[i].weight;
	//	cout << WordArray[TextArray2[i].wnum] <<"   "<<TextArray2[i].weight<<endl;
	}
	for(i= 0;i<nsize;i++)
	{
		textlen1 += textfreq1[i]*textfreq1[i];
		textlen2 += textfreq2[i]*textfreq2[i];
	}
	textlen1 = sqrt(textlen1);
	textlen2 = sqrt(textlen2);
	similarity = sprod_ns(textfreq1,textfreq2,nsize);

	if(textlen1*textlen2 != 0)
		similarity = similarity/(textlen1*textlen2);
	return similarity;
}

int find(vector<string> Array,string word)
{
	int size;
	size = Array.size();
	for(int i = 0;i<size;i++)
	{
		if(Array[i] == word)
		{
			return i;
		}
	}
	return -1;
}

int find(vector<SWORD> textarray,int pos)
{
	int size;
	size = textarray.size();
	for(int i = 0;i<size;i++)
	{
		if(textarray[i].wnum == pos)
		{
			return i;
		}
	}
	return -1;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -