📄 collaborative.c
字号:
/** * collaborative.c, collaborative filtering based recommendation Implementation * zhiyong zhang * louisville May 23th **/#include <stdio.h>#include <string.h>#include <strings.h>#include "recdef.h"#include "session.h"#define MAX_PROFILES 100typedef struct { double weight; char link[256]; } PROFILE_LINK;typedef struct { int profileNo; int sessionNum; int linkNum; PROFILE_LINK links[MAX_LINKS_PER_SESSION]; } PROFILE;static PROFILE profiles[MAX_PROFILES];static char profile_fname[256] = "/home/zhiyong/software/apache2/cgi-bin/profile.txt";static char profile_str[128] = "Profile";static double g_sim_threshold = 0.0;static int getProfiles(const char* fileName);static int compareWeight(const void* wt1, const void* wt2);static double getSimilarity(PROFILE profile1, PROFILE profile2);static int getProfiles(const char* fileName){ char line[1024]; char temp[1024]; char *ptr = NULL; char *ptr_temp = NULL; char *pptr = NULL; FILE* fp = fopen(fileName, "r"); int profileNum; int linkNum; if(fp == NULL) { fprintf(stderr, "failed to open profile file %s\n", fileName); return -1; } profileNum = 0; linkNum = 0; while(fgets(line, 1024, fp) != NULL) { if((ptr=strstr(line,profile_str)) != NULL) { linkNum = 0; // reset the link number //get the profile No strcpy(temp, ptr+strlen(profile_str)+1); pptr = strchr(temp, ','); if(pptr == NULL) continue; *pptr = '\0'; profiles[profileNum].profileNo = atoi(temp); //get the number of sessions for the profile ptr_temp = pptr+1; pptr = strchr(ptr_temp, '='); if(pptr == NULL) continue; ptr_temp = pptr+2; pptr = strchr(ptr_temp, ','); if(pptr == NULL) continue; *pptr = '\0'; profiles[profileNum].sessionNum = atoi(ptr_temp); //get the number of significant urls for the profile ptr_temp = pptr+1; pptr = strchr(ptr_temp, '='); ptr_temp = pptr+2; pptr = strchr(ptr_temp, ':'); if(pptr == NULL) continue; *pptr = '\0'; profiles[profileNum].linkNum = atoi(ptr_temp); profileNum++; } else if((ptr=strchr(line,'{')) != NULL) { //get the link's significance strcpy(temp, ptr+1); pptr = strchr(temp, '\t'); if(pptr == NULL) continue; *pptr = '\0'; if(profileNum > 0) { profiles[profileNum-1].links[linkNum].weight = atof(temp); //fprintf(stdout, "temp:%s, weight:%lf\n", temp, profiles[profileNum-1].links[linkNum].weight); } else { profiles[profileNum].links[linkNum].weight = atof(temp); } //get the link's url name ptr_temp = pptr+1; pptr = strchr(ptr_temp, '/'); if(pptr == NULL) continue; ptr_temp = pptr; pptr = strchr(ptr_temp, '}'); if(pptr == NULL) continue; *pptr = '\0'; if(profileNum > 0) { strcpy(profiles[profileNum-1].links[linkNum].link, ptr_temp); myStrToLower((unsigned char*)profiles[profileNum-1].links[linkNum].link); } else { strcpy(profiles[profileNum].links[linkNum].link, ptr_temp); myStrToLower((unsigned char*)profiles[profileNum].links[linkNum].link); } linkNum++; } } fclose(fp); return profileNum;}static double getSimilarity(PROFILE profile1, PROFILE profile2){ int i,j; char sessionLink[256]; char profileLink[256]; int overlapNum = 0; int max; double sim; int num1 = profile1.linkNum; int num2 = profile2.linkNum; char *ptr = NULL; for(i=0;i<num1;i++) { strcpy(sessionLink, profile1.links[i].link); myStrToLower((unsigned char*)sessionLink); for(j=0;j<num2;j++) { strcpy(profileLink, profile2.links[j].link); myStrToLower((unsigned char*)profileLink); if(!strcmp(sessionLink, profileLink)) { //fprintf(stdout, "session:%s\tprofile:%s\n", sessionLink, profileLink); overlapNum++; break; } } } //max = (num1>num2)?num1:num2; //fprintf(stdout, "overlap string num= %d", overlapNum); sim = (double)overlapNum/(sqrt(num1)*sqrt(num2)); // similarity = a ^ b/ a * b return sim;}static int compareWeight(const void* wt1, const void* wt2){ REC_URL* weight1 = (REC_URL*)wt1; REC_URL* weight2 = (REC_URL*)wt2; if (weight1->wt > weight2->wt) { return -1; } else if (weight1->wt < weight2->wt) { return 1; } else { return 0; } }/**this code is used to get the collaborative filtering based recommendation results * urls and recs must be preallocated. **/int getCollaborativeFilteringRecs(SESSION_LINKS links, REC_LINKS* recs){ int i,j,k; double sim; char *ptr = NULL; int profileNum; PROFILE cur_session; REC_LINKS rec_links; int occur_flag = 0; int exist_flag = 0; int linkNum = links.num; bzero(&rec_links, sizeof(REC_LINKS)); bzero(&cur_session, sizeof(PROFILE)); profileNum = getProfiles(profile_fname); if(profileNum < 0) { fprintf(stderr, "failed to read profiles!\n"); return -1; } /*for(i=0;i<profileNum;i++) { fprintf(stdout, "profile: %d\t Session: %d\t linkNum: %d\n", profiles[i].profileNo, profiles[i].sessionNum, profiles[i].linkNum); for(j=0;j<profiles[i].linkNum;j++) { fprintf(stdout, "weight:%.2lf\t url:%s\n", profiles[i].links[j].weight, profiles[i].links[j].link); } }*/ cur_session.linkNum = linkNum; for(i=0;i<linkNum;i++) { if(strstr(links.urls[i].url, "http://")!=NULL && strstr(links.urls[i].url, ".edu/")!=NULL) { ptr = strstr(links.urls[i].url, ".edu/"); strcpy(cur_session.links[i].link, ptr+4); } else strcpy(cur_session.links[i].link, links.urls[i].url); } for(i=0;i<profileNum;i++) { sim = getSimilarity(cur_session,profiles[i]); if(sim > g_sim_threshold) { for(j=0;j<profiles[i].linkNum;j++) { occur_flag = 0; for(k=0;k<cur_session.linkNum;k++) { if(!strcmp(cur_session.links[k].link, profiles[i].links[j].link)) { occur_flag = 1; break; } } if(occur_flag == 1) continue; exist_flag = 0; for(k=0;k<rec_links.num;k++) { if(!strcmp(rec_links.urls[k].url, profiles[i].links[j].link)) { exist_flag = 1; rec_links.urls[k].wt++; //already existed in recommendation set, increase weight by 1 break; } } if(exist_flag == 1) continue; //never occured in current session and not existed in recommendations, add to recommendation set if(rec_links.num >= MAX_RECS_PER_VISIT) continue; strcpy(rec_links.urls[rec_links.num].url, profiles[i].links[j].link); rec_links.urls[rec_links.num++].wt++; } } } qsort(rec_links.urls, rec_links.num, sizeof(REC_URL), compareWeight); *recs = rec_links; return rec_links.num;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -