⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 collaborative.c

📁 C编写的用来实现search engine的推荐功能
💻 C
字号:
/** * collaborative.c, collaborative filtering based recommendation Implementation * zhiyong zhang * louisville May 23th **/#include <stdio.h>#include <string.h>#include <strings.h>#include "recdef.h"#include "session.h"#define MAX_PROFILES 100typedef struct {	double weight;  	char link[256];  } PROFILE_LINK;typedef struct {	int profileNo;	int sessionNum;	int linkNum;  	PROFILE_LINK links[MAX_LINKS_PER_SESSION];  } PROFILE;static PROFILE profiles[MAX_PROFILES];static char profile_fname[256] = "/home/zhiyong/software/apache2/cgi-bin/profile.txt";static char profile_str[128] = "Profile";static double g_sim_threshold = 0.0;static int getProfiles(const char* fileName);static int compareWeight(const void* wt1, const void* wt2);static double getSimilarity(PROFILE profile1, PROFILE profile2);static int getProfiles(const char* fileName){	char line[1024];	char temp[1024];	char *ptr = NULL;	char *ptr_temp = NULL;	char *pptr = NULL;	FILE* fp = fopen(fileName, "r");	int profileNum;	int linkNum;	if(fp == NULL)	{		fprintf(stderr, "failed to open profile file %s\n", fileName);		return -1;	}	profileNum = 0;	linkNum = 0; 	while(fgets(line, 1024, fp) != NULL)	{		if((ptr=strstr(line,profile_str)) != NULL)		{			linkNum = 0; // reset the link number			//get the profile No			strcpy(temp, ptr+strlen(profile_str)+1);			pptr = strchr(temp, ',');			if(pptr == NULL) continue;			*pptr = '\0';			profiles[profileNum].profileNo = atoi(temp);			//get the number of sessions for the profile			ptr_temp = pptr+1;			pptr = strchr(ptr_temp, '=');			if(pptr == NULL) continue;			ptr_temp = pptr+2;			pptr = strchr(ptr_temp, ',');			if(pptr == NULL) continue;			*pptr = '\0';			profiles[profileNum].sessionNum = atoi(ptr_temp);			//get the number of significant urls for the profile			ptr_temp = pptr+1;			pptr = strchr(ptr_temp, '=');			ptr_temp = pptr+2;			pptr = strchr(ptr_temp, ':');			if(pptr == NULL) continue;			*pptr = '\0';			profiles[profileNum].linkNum = atoi(ptr_temp);			profileNum++;					}		else if((ptr=strchr(line,'{')) != NULL)		{			//get the link's significance			strcpy(temp, ptr+1);			pptr = strchr(temp, '\t');			if(pptr == NULL) continue;			*pptr = '\0';			if(profileNum > 0)			{				profiles[profileNum-1].links[linkNum].weight = atof(temp);				//fprintf(stdout, "temp:%s, weight:%lf\n", temp, profiles[profileNum-1].links[linkNum].weight);			}			else			{				profiles[profileNum].links[linkNum].weight = atof(temp);			}			//get the link's url name			ptr_temp = pptr+1;			pptr = strchr(ptr_temp, '/');			if(pptr == NULL) continue;			ptr_temp = pptr;			pptr = strchr(ptr_temp, '}');			if(pptr == NULL) continue;			*pptr = '\0';			if(profileNum > 0)			{				strcpy(profiles[profileNum-1].links[linkNum].link, ptr_temp);				myStrToLower((unsigned char*)profiles[profileNum-1].links[linkNum].link);			}			else			{				strcpy(profiles[profileNum].links[linkNum].link, ptr_temp);				myStrToLower((unsigned char*)profiles[profileNum].links[linkNum].link);			}			linkNum++;		}	}	fclose(fp);	return profileNum;}static double getSimilarity(PROFILE profile1, PROFILE profile2){	int i,j;	char sessionLink[256];	char profileLink[256];	int overlapNum = 0;	int max;	double sim;	int num1 = profile1.linkNum;	int num2 = profile2.linkNum;	char *ptr = NULL;	for(i=0;i<num1;i++)	{		strcpy(sessionLink, profile1.links[i].link);		myStrToLower((unsigned char*)sessionLink);		for(j=0;j<num2;j++)		{			strcpy(profileLink, profile2.links[j].link);			myStrToLower((unsigned char*)profileLink);			if(!strcmp(sessionLink, profileLink))			{				//fprintf(stdout, "session:%s\tprofile:%s\n", sessionLink, profileLink);				overlapNum++;				break;			}		}	}	//max = (num1>num2)?num1:num2;	//fprintf(stdout, "overlap string num= %d", overlapNum);	sim = (double)overlapNum/(sqrt(num1)*sqrt(num2)); // similarity = a ^ b/ a *  b	return sim;}static int compareWeight(const void* wt1, const void* wt2){	REC_URL* weight1 = (REC_URL*)wt1;	REC_URL* weight2 = (REC_URL*)wt2;	if (weight1->wt > weight2->wt)	{		return -1;	}	else if (weight1->wt < weight2->wt)	{		return 1;	}	else	{		return 0;	}	}/**this code is used to get the collaborative filtering based recommendation results * urls and recs must be preallocated. **/int getCollaborativeFilteringRecs(SESSION_LINKS links, REC_LINKS* recs){	int i,j,k;	double sim;	char *ptr = NULL;	int profileNum;	PROFILE cur_session;	REC_LINKS rec_links;	int occur_flag = 0;	int exist_flag = 0;	int linkNum = links.num;		bzero(&rec_links, sizeof(REC_LINKS));	bzero(&cur_session, sizeof(PROFILE));		profileNum = getProfiles(profile_fname);	if(profileNum < 0)	{		fprintf(stderr, "failed to read profiles!\n");		return -1;	}		/*for(i=0;i<profileNum;i++)	{		fprintf(stdout, "profile: %d\t Session: %d\t linkNum: %d\n", 			profiles[i].profileNo, profiles[i].sessionNum, profiles[i].linkNum);		for(j=0;j<profiles[i].linkNum;j++)		{			fprintf(stdout, "weight:%.2lf\t url:%s\n", profiles[i].links[j].weight, profiles[i].links[j].link);		}	}*/	cur_session.linkNum = linkNum;	for(i=0;i<linkNum;i++)	{		if(strstr(links.urls[i].url, "http://")!=NULL && strstr(links.urls[i].url, ".edu/")!=NULL)		{			ptr = strstr(links.urls[i].url, ".edu/");			strcpy(cur_session.links[i].link, ptr+4);		}		else strcpy(cur_session.links[i].link, links.urls[i].url);	}		for(i=0;i<profileNum;i++)	{		sim = getSimilarity(cur_session,profiles[i]);		if(sim > g_sim_threshold)		{			for(j=0;j<profiles[i].linkNum;j++)			{				occur_flag = 0;				for(k=0;k<cur_session.linkNum;k++) 				{					if(!strcmp(cur_session.links[k].link, profiles[i].links[j].link))					{						occur_flag = 1;						break;					}				}				if(occur_flag == 1) continue;				exist_flag = 0;				for(k=0;k<rec_links.num;k++)				{					if(!strcmp(rec_links.urls[k].url, profiles[i].links[j].link))					{						exist_flag = 1;						rec_links.urls[k].wt++;  //already existed in recommendation set, increase weight by 1						break;					}				}				if(exist_flag == 1) continue;				//never occured in current session and not existed in recommendations, add to recommendation set				if(rec_links.num >= MAX_RECS_PER_VISIT) continue;				strcpy(rec_links.urls[rec_links.num].url, profiles[i].links[j].link);				rec_links.urls[rec_links.num++].wt++;			}					}	}	qsort(rec_links.urls, rec_links.num, sizeof(REC_URL), compareWeight);	*recs = rec_links;	return rec_links.num;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -