⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 run.activelearning.cpp

📁 评测一个垃圾邮件过滤器的性能,VC环境下编写
💻 CPP
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <map>
#include <string>
using namespace std;

char fname[200], judge[200], clas[200], tfile[200], buf[1000], res[1000], labelReq[200];
int i,j,k,nf;
FILE *in, *out;
double score = 0.0;

map<string,string> tmap;

long int labelAllowance = 0;
bool restrictLabeling = false;
long int messages = 0;


/*
case $# in
3) cpath=$1 ; runid=$2 ;output=$3 ;;
2) cpath=$1 ; runid=$2 ;output=results ;;
1) cpath=$1 ; runid=none ;output=results ;;
0) cpath=`pwd`;  output=results ;;
*/

int main(int argc, char **argv){


  /* argv[4] contains the labelAllowance -- if empty, default to no restrictions
     on label requsts */

   if (argc < 5) {
     restrictLabeling = 0;
   } else {
     restrictLabeling = 1;
     labelAllowance = atoi(argv[4]);
   }

   if (argc < 4) 
		strcpy(res,"results");
   else 
		strcpy(res,argv[3]);

   if (argc >= 2) {
      sprintf(fname,"%s/index",argv[1]);
      if (!freopen(fname,"r",stdin)) {
         perror(fname);
         exit(1);
      }
      if (restrictLabeling) {
         int i; 
		 char label[100];
         while (1 == scanf("%s%*[^\n]",label)) {
            if (!strcmp(label,"ham") || !strcmp(label,"spam")) messages++;
            else {
               fprintf(stderr,"delayed feedback label '%s' in index file unsupported for active learning\n",label);
               exit(1);
            }
         }
         if (!freopen(fname,"r",stdin)) {
            perror(fname);
            exit(1);
         }
      }
   } else {
      printf("usage: %s corpusdir runid resultfile [labelAllowance]\n",argv[0]);
      exit(1);
   }
   if (!(out = fopen(res,"w"))) {
      perror("results file");
      exit(1);
   }

   /* Run the filter's initialize routine... */

   system("rm -f filter_out");
   //system("./initialize");
   //sleep(500);
   printf("done init\n");


   /* Run the filter tests... */

   while (gets(buf)) {
      printf("buf %s\n",buf);
      if (2 != sscanf(buf,"%s%s",&judge,&fname)) {
         printf("bad line in index file: %s\n",buf);
      }

      printf("judge %s fname %s end\n",judge,fname);


      /* Run the filter classify routine... */

      if (!strcmp(judge,"ham") || !strcmp(judge,"spam") ||
          !strcmp(judge,"Ham") || !strcmp(judge,"Spam")) {

	/* Modification: ./classify is now called with two parameters: 
	   email message location, and size of remaining label allowance. */  
	  
         sprintf(buf,"./classify %s%s %i %i </dev/null > filter_out",argv[1],fname,labelAllowance,messages--);
         printf("doing %s\n",buf);
         system(buf);
         system("cat filter_out");
         in = fopen("filter_out","r");
         if (!in) {
            perror("filter_out");
            exit(1);
         }
         strcpy(tfile,"");

	 
	 /* Parse filter output */

         nf = fscanf(in,"class=%s score=%lf tfile=%s labelReq=%s",clas,&score,tfile,labelReq);
         tmap[fname] = tfile;
		 printf("read field count: %d\n", nf);
	 if (nf < 4) strcpy(labelReq, "labelN");  
         if (nf < 2) score = 0;
         if (nf < 1) strcpy(clas,"ham");


	 /* Output true label, classification, score, labelRequest, and remaining label allowance. */
         fprintf(out,"%s judge=%s class=%s score=%0.8lf labelReq= %s labelAllowance= %i\n",fname,
                  judge[1]=='p'?"spam":"ham",clas,score,labelReq,labelAllowance);


         fflush(out);
         fclose(in);
      }

      /* Train Filter, unlesss there's a noRequest, or there's a labelN
	 request with no available labelAllowance */

      if (!restrictLabeling || 
	  (strcmp(labelReq,"noRequest") && !(!strcmp(labelReq,"labelN") && labelAllowance <= 0))) {
	
	/* labelB request says if no label available, then perform bootstrapping
	   by training on prediction */
	if (!strcmp(labelReq,"labelB") && labelAllowance <= 0)
	  strcpy(judge,clas);

	if (!strcmp(judge,"ham") || !strcmp(judge,"HAM")){
	  sprintf(buf,"./train %s %s%s %s < /dev/null","ham", argv[1],fname,tmap[fname].c_str());
	  printf("doing %s\n",buf);
	  system(buf);
	}

	if (!strcmp(judge,"spam") || !strcmp(judge,"SPAM")){
	  sprintf(buf,"./train %s %s%s %s < /dev/null","spam", argv[1],fname,tmap[fname].c_str());
	  printf("doing %s\n",buf);
	  system(buf);
	}
	
	if (labelAllowance >= 0)    /* protecting against underflow */ 
	  labelAllowance--;
	
      }
   }
   system("./finalize");
   return 0;
}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -