📄 run.activelearning.cpp
字号:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <map>
#include <string>
using namespace std;
char fname[200], judge[200], clas[200], tfile[200], buf[1000], res[1000], labelReq[200];
int i,j,k,nf;
FILE *in, *out;
double score = 0.0;
map<string,string> tmap;
long int labelAllowance = 0;
bool restrictLabeling = false;
long int messages = 0;
/*
case $# in
3) cpath=$1 ; runid=$2 ;output=$3 ;;
2) cpath=$1 ; runid=$2 ;output=results ;;
1) cpath=$1 ; runid=none ;output=results ;;
0) cpath=`pwd`; output=results ;;
*/
int main(int argc, char **argv){
/* argv[4] contains the labelAllowance -- if empty, default to no restrictions
on label requsts */
if (argc < 5) {
restrictLabeling = 0;
} else {
restrictLabeling = 1;
labelAllowance = atoi(argv[4]);
}
if (argc < 4)
strcpy(res,"results");
else
strcpy(res,argv[3]);
if (argc >= 2) {
sprintf(fname,"%s/index",argv[1]);
if (!freopen(fname,"r",stdin)) {
perror(fname);
exit(1);
}
if (restrictLabeling) {
int i;
char label[100];
while (1 == scanf("%s%*[^\n]",label)) {
if (!strcmp(label,"ham") || !strcmp(label,"spam")) messages++;
else {
fprintf(stderr,"delayed feedback label '%s' in index file unsupported for active learning\n",label);
exit(1);
}
}
if (!freopen(fname,"r",stdin)) {
perror(fname);
exit(1);
}
}
} else {
printf("usage: %s corpusdir runid resultfile [labelAllowance]\n",argv[0]);
exit(1);
}
if (!(out = fopen(res,"w"))) {
perror("results file");
exit(1);
}
/* Run the filter's initialize routine... */
system("rm -f filter_out");
//system("./initialize");
//sleep(500);
printf("done init\n");
/* Run the filter tests... */
while (gets(buf)) {
printf("buf %s\n",buf);
if (2 != sscanf(buf,"%s%s",&judge,&fname)) {
printf("bad line in index file: %s\n",buf);
}
printf("judge %s fname %s end\n",judge,fname);
/* Run the filter classify routine... */
if (!strcmp(judge,"ham") || !strcmp(judge,"spam") ||
!strcmp(judge,"Ham") || !strcmp(judge,"Spam")) {
/* Modification: ./classify is now called with two parameters:
email message location, and size of remaining label allowance. */
sprintf(buf,"./classify %s%s %i %i </dev/null > filter_out",argv[1],fname,labelAllowance,messages--);
printf("doing %s\n",buf);
system(buf);
system("cat filter_out");
in = fopen("filter_out","r");
if (!in) {
perror("filter_out");
exit(1);
}
strcpy(tfile,"");
/* Parse filter output */
nf = fscanf(in,"class=%s score=%lf tfile=%s labelReq=%s",clas,&score,tfile,labelReq);
tmap[fname] = tfile;
printf("read field count: %d\n", nf);
if (nf < 4) strcpy(labelReq, "labelN");
if (nf < 2) score = 0;
if (nf < 1) strcpy(clas,"ham");
/* Output true label, classification, score, labelRequest, and remaining label allowance. */
fprintf(out,"%s judge=%s class=%s score=%0.8lf labelReq= %s labelAllowance= %i\n",fname,
judge[1]=='p'?"spam":"ham",clas,score,labelReq,labelAllowance);
fflush(out);
fclose(in);
}
/* Train Filter, unlesss there's a noRequest, or there's a labelN
request with no available labelAllowance */
if (!restrictLabeling ||
(strcmp(labelReq,"noRequest") && !(!strcmp(labelReq,"labelN") && labelAllowance <= 0))) {
/* labelB request says if no label available, then perform bootstrapping
by training on prediction */
if (!strcmp(labelReq,"labelB") && labelAllowance <= 0)
strcpy(judge,clas);
if (!strcmp(judge,"ham") || !strcmp(judge,"HAM")){
sprintf(buf,"./train %s %s%s %s < /dev/null","ham", argv[1],fname,tmap[fname].c_str());
printf("doing %s\n",buf);
system(buf);
}
if (!strcmp(judge,"spam") || !strcmp(judge,"SPAM")){
sprintf(buf,"./train %s %s%s %s < /dev/null","spam", argv[1],fname,tmap[fname].c_str());
printf("doing %s\n",buf);
system(buf);
}
if (labelAllowance >= 0) /* protecting against underflow */
labelAllowance--;
}
}
system("./finalize");
return 0;
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -