⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 querylsi.cpp

📁 根据潜在语义分析进行查询。将文本中的特征集合做LSI变换。
💻 CPP
📖 第 1 页 / 共 2 页
字号:
					  (SDD_Q(intCurrentTerm,1)*((log10((float)intNoDocs/intTermTotal[intCurrentTerm]))));
			}
	

	double cosine = 0.0;  //similarity result
	int intRankSize=1033; //how many documents do we want in query results
	double* dblResults = new double[intRankSize]; //ranked list of top cosine values
	int* intResultDoc = new int[intRankSize];  //ranked list of top documents
	double dblThreshold=-100.0; //any cosine less than this value will not be displayed
	int intMatch = 0; // contains array index where new value needs to be entered

	//initialise ranking arrays
	for (i = 0; i<(intRankSize-1); i++)
	{
		dblResults[i]=0.000000;
		intResultDoc[i]=0;
	}
   
	Matrix<double> SDD_A(intRank, intNoDocs);
	Matrix<double> SDD_newQ(intRank,1);	
	Matrix<double> SDD_newQ_t(1,intRank);
	Matrix<double> SDD_Cosine(intNoDocs,1);
	Matrix<double> SDD_Result(1,1);

	//first matrix required to compute similarity with query vector
   	SDD_A =  SDD_D * SDD_Y_t;
	
	//normalize Matrix SDD_A (intRank x intNoDocs)
	double dblSumTermFreq=0;
	double dblNormValue=0;
	for (intCurrentDoc = 1; intCurrentDoc <=intNoDocs; intCurrentDoc++)
		{
		dblSumTermFreq = 0;
		dblNormValue=0;
		for (intCurrentRank = 1; intCurrentRank<=intRank; intCurrentRank++)
			dblSumTermFreq=dblSumTermFreq+(SDD_A(intCurrentRank,intCurrentDoc) * SDD_A(intCurrentRank,intCurrentDoc));
			dblNormValue= sqrt(dblSumTermFreq);
	    
		for (intCurrentRank = 1; intCurrentRank<=intRank; intCurrentRank++)
			if (SDD_A(intCurrentRank,intCurrentDoc) != 0)
				SDD_A(intCurrentRank,intCurrentDoc)=
					(SDD_A(intCurrentRank,intCurrentDoc) / dblNormValue);
		}

	//project query vector in the same space as term-doc matrix
	SDD_newQ = SDD_X_t * SDD_Q;

	//transpose SDD_newQ vector
	for (intCurrentRank=1;intCurrentRank<=intRank;intCurrentRank++)
		SDD_newQ_t(1,intCurrentRank) = SDD_newQ(intCurrentRank,1);

	//calculate similarity for each document
	SDD_Cosine = SDD_newQ_t * SDD_A;
	
	//Now output the cosine for each document
	for (intCurrentDoc = 1; intCurrentDoc<=intNoDocs;  intCurrentDoc++)
	{
		cosine = SDD_Cosine(1,intCurrentDoc);
		
		//insert cosine into result list in rank order
		if (cosine > dblThreshold)
		{  
			intMatch = -1;
			for (i=0; i<intRankSize; i++)
			{
				if (cosine >= dblResults[i])
				{	intMatch=i;
					break; 
				}
			}
			 if (intMatch >= 0)
			 {				
			 //shift elements to the right and insert the new value
				for (j=intRankSize-1; j>=intMatch;j--)
				 {

					dblResults[j+1]=dblResults[j];
					intResultDoc[j+1] = intResultDoc[j];	
				 }	 //insert new value
				 dblResults[intMatch]=cosine;
				 intResultDoc[intMatch] = intCurrentDoc;	
			 }
		}
			
		
	}

    //The following code is used to calculate various precision measures.
	int intRelID = 0;    
	int intHitCount= 0;  //counter for the number of correct docs retrieved
	double dblRecall = 0.0; 
    
	double* dblPrecisionPercent = new double[11];  //for 11 point int-ave-precision
	double* dbl25_50_75Percent = new double[3];    //for 25/50/75 average precision

	//initialise arrays and local variables
	for (i = 0; i<11; i++)
		dblPrecisionPercent[i] = 0;

	for (i = 0; i<3; i++)
		dbl25_50_75Percent[i] = 0;

	double dblPrecision = 0.0;
	double dblAvePrecision = 0.0;
	double dblSumPrecision = 0.0;
	double dblRPrecision = 0.0;

	//Output details for each document in descending
	//order of similarity.  
	for (i = 0; i<intRankSize  ; i++)		
	{
		cout << "Rank : " << i+1 << " Document: " << intResultDoc[i] <<  " Cosine: " << dblResults[i];  
		intRelID = relList.find(intResultDoc[i]);
		if (intRelID > 0) 
		{
			intHitCount++;
		}
		if (intHitCount> 0 && intHitCount <= intRelDocTotal) 
		{
			dblRecall = (double)intHitCount/(double)intRelDocTotal;
			dblPrecision = (double)intHitCount/((double)i+1);
			if (intRelID > 0)
			{
				dblSumPrecision += dblPrecision;
			}
		}
		cout << " P= " << dblPrecision;
		cout << " R= " << dblRecall ;
		cout << " Hit total: " << intHitCount	<< endl;

		if (dblRecall <= .25)
			dbl25_50_75Percent[0] = dblPrecision;
		else if (dblRecall <= .50)
			dbl25_50_75Percent[1] = dblPrecision;
		else if (dblRecall <= .75)
			dbl25_50_75Percent[2] = dblPrecision;

		//100% recall  used for R-Precision
		if (intHitCount == intRelDocTotal)
			if ( dblPrecision > dblPrecisionPercent[10] )
				dblPrecisionPercent[10] = dblPrecision;

		//set 11 point interpolated average components
		if (dblRecall <= .10)
			{	if ( dblPrecision > dblPrecisionPercent[0] )
				dblPrecisionPercent[0] = dblPrecision; }
		else if (dblRecall > .10 && dblRecall <= .20) 
			{	if ( dblPrecision > dblPrecisionPercent[1] )
					dblPrecisionPercent[1] = dblPrecision;}
		else if (dblRecall > .20 && dblRecall <= .30)
			{	if ( dblPrecision > dblPrecisionPercent[2] )
					dblPrecisionPercent[2] = dblPrecision;}
		else if (dblRecall > .30 && dblRecall <= .40)
			{	if ( dblPrecision > dblPrecisionPercent[3] )
					dblPrecisionPercent[3] = dblPrecision;}
		else if (dblRecall > .40 && dblRecall <= .50)
			{	if ( dblPrecision > dblPrecisionPercent[4] )
					dblPrecisionPercent[4] = dblPrecision;}
		else if (dblRecall > .50 && dblRecall <= .60)
			{	if ( dblPrecision > dblPrecisionPercent[5] )
					dblPrecisionPercent[5] = dblPrecision;}
		else if (dblRecall > .60 && dblRecall <= .70)
			{	if ( dblPrecision > dblPrecisionPercent[6] )
					dblPrecisionPercent[6] = dblPrecision;}
		else if (dblRecall > .70 && dblRecall <= .80)
			{	if ( dblPrecision > dblPrecisionPercent[7] )
					dblPrecisionPercent[7] = dblPrecision;}
		else if (dblRecall > .80 && dblRecall <= .90)
			{	if ( dblPrecision > dblPrecisionPercent[8] )
					dblPrecisionPercent[8] = dblPrecision;}
		else if(dblRecall > .90 && dblRecall <= 1)
			{	if ( dblPrecision > dblPrecisionPercent[9] )
				dblPrecisionPercent[9] = dblPrecision;}

	}

	//calculate interpolated average precision and output results
	//for this query.
	for (i = 0; i< 11; i++)
		dblAvePrecision = dblAvePrecision + dblPrecisionPercent[i];
	dblAvePrecision = dblAvePrecision /11.0;
	cout << "Interpolated Average Precision for Query " << intCurrentQuery << " = " << dblAvePrecision << endl;	
    cout << "Non-InterpolatedPrecision for for Query " << intCurrentQuery << " = " << dblSumPrecision / (double) intRelDocTotal << endl;
	cout << "R_Precision = " << dblPrecisionPercent[10] << endl;
	cout << "Average based on mean of 25,50 and 75 percent recall: " <<  ((dbl25_50_75Percent[0]  + dbl25_50_75Percent[1] + dbl25_50_75Percent[2])/3.0) << endl;
	cout << "Precision at Recall levels 0 to 10" << endl;
	cout << "******************************************" << endl;
	for (i = 0; i< 11; i++)
		cout << dblPrecisionPercent[i] << endl;
	cout << "******************************************" << endl;
  
	return 0;
}


// This routine is useful for calculating the cosine of an normal VSM matrix
// Parameters are: the term-dccument matrix, the query vector, the current document 
// being compared, and the number of rows in the term-doc matrix.  
// The cosine between the query vector and the chosen document is returned. 
double calculateCosine(const Matrix<double>& termDocMatrix, const Matrix<double>& queryVector, const int intCurrentDoc, const int intNoTerms)
{

	double sumA2 = 0;  
	double sumB2 = 0;
	double sumAB = 0;
	for (int i = 1; i <= intNoTerms; i++)
	{
		sumA2 = sumA2 + ( termDocMatrix(i,intCurrentDoc) * termDocMatrix(i,intCurrentDoc) );
		sumB2 = sumB2 + ( queryVector(i,1) * queryVector(i,1) );
		sumAB = sumAB + (termDocMatrix(i,intCurrentDoc) * queryVector(i, 1) );
	}
	
	if( sqrt(sumA2) * sqrt(sumB2) > 0)
		return sumAB / (sqrt(sumA2) * sqrt(sumB2));
	else 
		return 0;
}

//Utility routine for converting a string into uppercase characters
char* convertCase(char *inTerm)
{
    for(int i = 0; i < 100; i++ ) 
		inTerm[i] = toupper(inTerm[i]);

		return inTerm;

}

// Utility routine for sorting an array (bubble sort)
// Parameters are an array of doubles and the number of elements in the array 
void sort (double* a,int n)
{ for (int i=1; i<n; i++)
	{ double temp = a[i];
      for (int j=1; j>0 && a[j-1] >temp; j--)
		  a[j] = a[j-1];
	  a[j]=temp;
	}
}



void printDateTime()
{
   
   // Use the following lines in any *function* where you access date or time.
   // They create objects to hold the time as formatted in Unix and to provide
   // access to the time field-by-filed.
   // This routine has been adapted from Eugene Wallingford	system_time_and_date.C 
   // code available at http://www.cs.uni.edu/~wallingf/teaching/052/code/

   tm * local_time;                // pointer to a tm object              
   time_t current_time;            // time_t object to hold current time
   time( &current_time );                      // get the current date
   local_time = localtime( &current_time );    // break it up into a tm object

   // Here's how you access month, day, and year:

   int day   = local_time->tm_mday;
   int month = local_time->tm_mon + 1;       // month in tm is in range [0..11]
   int year  = local_time->tm_year + 1900;   // year in tm is number since 1900

   // Here's how you access hours, minutes, and seconds:

   int hour   = local_time->tm_hour;
   int minute = local_time->tm_min;
   int second = local_time->tm_sec;

   //--------------------------------------------------------------------------

   cout << "Run time: "
        << hour << ':' << minute << ':' << second << endl
        << "Run date: "
        << month << '/' << day << '/' << year << endl;
}




⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -