📄 dctree.cc
字号:
retNode->child = new (node *)[maxChild]; retNode->dcObj = new (DC *)[maxChild]; for(int i=0;i<maxChild;i++){ retNode->child[i] = NULL; retNode->dcObj[i] = NULL; } retNode->dcObj[0] = ent; } if(retNode != NULL){ (* root)->dcObj[targetEnt] = new DC(-1,NULL,numFeat,NULL); for(int i=0;i<(* root)->child[targetEnt]->numChild;i++){ (* root)->dcObj[targetEnt]->merge((* root)->child[targetEnt]->dcObj[i]); } /* there is splitting in the child node */ int nc; nc = (* root)->numChild; if(nc < maxChild){ (* root)->child[nc] = retNode; (* root)->dcObj[nc] = new DC(-1,NULL,numFeat,NULL); for(int i=0;i<retNode->numChild;i++){ (* root)->dcObj[nc]->merge(retNode->dcObj[i]); } ((* root)->numChild)++; (* root)->isLeaf = 0; return NULL; }else{ /* splitting the nonleaf node */ return splitNode(root,retNode); } }else{ return NULL; } }}int showTree(node *root,int lvl,int *pc,int *ncl){ int cover,N1,N2; double tmpF,P,R; if(lvl > height){ height = lvl; } if(root->isLeaf != 1){ int nd=0; for(int i=0;i<root->numChild;i++){ nd = nd + root->dcObj[i]->N; } for(int j=0;j<lvl;j++){ printf(" "); } printf("level=%d numChild=%d numDoc=%d\n",lvl,root->numChild,nd); cover = 0; for(int i=0;i<root->numChild;i++){ int p,maxp,maxTopic; double stat,maxStat; for(int j=0;j<lvl;j++){ printf(" "); } printf("l%d child %d: numDoc=%d\n",lvl,i,root->dcObj[i]->N); for(int j=0;j<lvl;j++){ printf(" "); } printf("[ "); int flag=0; for(int j=0;j<numFeat;j++){ double df; df = ((double) root->dcObj[i]->W[j])/((double) root->dcObj[i]->N); if(df > t2){ printf("%s:%.2lf ",feature[j],df); flag = 1; } } printf("]\n"); maxStat = -1.0; maxTopic = 0; for(int j=0;j<numTopic;j++){ p = root->dcObj[i]->showStat(topic[j]); stat = ((double) p)/((double) root->dcObj[i]->N); if(stat > maxStat){ maxStat = stat; maxTopic = j; maxp = p; } } if(flag == 1 && (root->dcObj[i]->N) > t3){ N1 = maxp; N2 = root->dcObj[i]->N; P = maxStat; R = ((double) N1)/((double) N3[maxTopic]); tmpF = (2.0*P*R)/(P+R); if(tmpF > F[maxTopic]){ F[maxTopic] = tmpF; T[maxTopic] = maxp; } for(int j=0;j<lvl;j++){ printf(" "); } printf("[ %s:%.2lf ]\n",topic[maxTopic],maxStat);/* for(int j=0;j<lvl;j++){ printf(" "); } printf("[ "); for(int j=0;j<numFeat;j++){ double df; df = ((double) root->dcObj[i]->W[j])/((double) root->dcObj[i]->N); if(df > t2){ printf("%s:%.2lf ",feature[j],df); } } printf("]\n");*/ cover = cover + root->dcObj[i]->N; *pc = *pc + maxp; (*ncl)++; }else if(root->dcObj[i]->N > t3){ cover = cover + showTree(root->child[i],lvl+1,pc,ncl); }else{ cover = 0; } } }else{ int nd=0; for(int i=0;i<root->numChild;i++){ nd = nd + root->dcObj[i]->N; } for(int j=0;j<lvl;j++){ printf(" "); } printf("level=%d numChild=%d numDoc=%d\n",lvl,root->numChild,nd); cover = 0; for(int i=0;i<root->numChild;i++){ int p,maxp,maxTopic; double stat,maxStat; for(int j=0;j<lvl;j++){ printf(" "); } printf("[ "); int flag=0; for(int j=0;j<numFeat;j++){ double df; df = ((double) root->dcObj[i]->W[j])/((double) root->dcObj[i]->N); if(df > t2){ printf("%s:%.2lf ",feature[j],df); flag = 1; } } printf("]\n"); maxStat = -1.0; maxTopic = 0; for(int j=0;j<lvl;j++){ printf(" "); } printf("DC %d: numDoc=%d\n",i,root->dcObj[i]->N); for(int j=0;j<numTopic;j++){ p = root->dcObj[i]->showStat(topic[j]); stat = ((double) p)/((double) root->dcObj[i]->N); if(stat > maxStat){ maxStat = stat; maxTopic = j; maxp = p; } } if(flag == 1 && root->dcObj[i]->N > t3){ N1 = maxp; N2 = root->dcObj[i]->N; P = maxStat; R = ((double) N1)/((double) N3[maxTopic]); tmpF = (2.0*P*R)/(P+R); if(tmpF > F[maxTopic]){ F[maxTopic] = tmpF; } for(int j=0;j<lvl;j++){ printf(" "); } printf("[ %s:%.2lf ]\n",topic[maxTopic],maxStat); cover = cover + root->dcObj[i]->N; *pc = *pc + maxp; (*ncl)++;/* for(int j=0;j<lvl;j++){ printf(" "); } printf("[ "); for(int j=0;j<numFeat;j++){ double df; df = ((double) root->dcObj[i]->W[j])/((double) root->dcObj[i]->N); if(df > t2){ printf("%s:%.2lf ",feature[j],df); } } printf("]\n");*/ } dNode *dl; dl = root->dcObj[i]->docList; while(dl != NULL){ for(int j=0;j<lvl+1;j++){ printf(" "); } printf("Doc %d %s\n",dl->ID,dl->label); dl = dl->next; } } } return cover;}int printTree(node *root,int lvl,int *h){ int numClust; if(root->isLeaf != 1){ printf("level=%d numChild=%d\n",lvl,root->numChild); for(int i=0;i<root->numChild;i++){ if(root->dcObj[i]->N >= 20 && root->dcObj[i]->N <=100){ printf("child %d: numDoc=%d\n",i,root->dcObj[i]->N); if(lvl > 0){ for(int j=0;j<numFeat;j++){ double t; t = ((double) root->dcObj[i]->W[j])/((double) root->dcObj[i]->N); if(t > 0.65){ printf("%s:%d ",feature[j],root->dcObj[i]->W[j]); } } printf("\n"); } } } numClust = 0; for(int i=0;i<root->numChild;i++){ numClust = numClust+printTree(root->child[i],lvl+1,h); } }else{// printf("level=%d numChild=%d\n",lvl,root->numChild); *h = lvl; numClust = root->numChild;/* for(int i=0;i<root->numChild;i++){ printf("DC %d: numDoc=%d\n",i,root->dcObj[i]->N); dNode *dl; dl = root->dcObj[i]->docList; while(dl != NULL){ printf(" Doc %d label=%s\n",dl->ID,dl->label); dl = dl->next; } }*/ } return numClust;}int main(int argc,char **argv){ FILE *input,*featFile; clock_t e_start,e_end,e_diff; double e_time; int coverage,pCover,numClust; // time_t r_start,r_end,r_time; /* checking input format */ if(argc!=8){ cerr << "Usage: " << argv[0] << " inputFile minChild maxChild simThreshold t1 t2 t3\n"; exit(1); } /* open input and output file */ if((input=fopen(argv[1],"r")) == NULL){ cerr << "Cannot open the input file: " << argv[1] << endl; exit(1); } if((featFile=fopen("./feature","r")) == NULL){ cerr << "Cannot open the feature file: feature\n"; exit(1); } if((topicFile=fopen("./topic","r")) == NULL){ cerr << "Cannot open the topic file: topic\n"; exit(1); } /* handle input parameters */ fscanf(input,"%d %d\n",&numDoc,&numFeat); fscanf(topicFile,"%d\n",&numTopic); minChild = atoi(argv[2]); maxChild = atoi(argv[3]); simThres = atof(argv[4]); t1 = atof(argv[5]); t2 = atof(argv[6]); t3 = atoi(argv[7]); node *dctree,*retNode; DC *newDC,*dc[numDoc]; char label[MaxLabelLen]; int featVect[numFeat]; topic = new (char *)[numTopic]; for(int i=0;i<numTopic;i++){ topic[i] = new char[MaxLabelLen]; fscanf(topicFile,"%s",topic[i]); } feature = new (char *)[numFeat]; for(int i=0;i<numFeat;i++){ feature[i] = new char[MaxLabelLen]; fscanf(featFile,"%s",feature[i]); } N3 = new int[numTopic]; T = new int[numTopic]; F = new double[numTopic]; for(int i=0;i<numTopic;i++){ N3[i] = 0; T[i] = 0; F[i] = 0.0; } /* initilaize the root node of the tree */ dctree = (node *)malloc(sizeof(node)); dctree->isLeaf = 1; dctree->numChild = 0; dctree->child = new (node *)[maxChild]; dctree->dcObj = new (DC *)[maxChild]; for(int i=0;i<maxChild;i++){ dctree->child[i] = NULL; dctree->dcObj[i] = NULL; } e_start = clock(); for(int i=0;i<numDoc;i++){ /* get data from the input file */ for(int j=0;j<numFeat;j++){ fscanf(input,"%d",&featVect[j]); } fscanf(input,"%s",label); newDC = new DC(i,featVect,numFeat,label); for(int j=0;j<numTopic;j++){ if(strcmp(topic[j],label) == 0){ (N3[j])++; } } retNode = insert(&dctree,newDC); /* split the root node */ if(retNode != NULL){ node *newRoot; newRoot = (node *)malloc(sizeof(node)); newRoot->isLeaf = 0; newRoot->numChild = 2; newRoot->child = new (node *)[maxChild]; newRoot->dcObj = new (DC *)[maxChild]; for(int j=0;j<maxChild;j++){ newRoot->child[j] = NULL; newRoot->dcObj[j] = NULL; } newRoot->child[0] = dctree; newRoot->dcObj[0] = new DC(-1,NULL,numFeat,NULL); for(int j=0;j<dctree->numChild;j++){ newRoot->dcObj[0]->merge(dctree->dcObj[j]); } newRoot->child[1] = retNode; newRoot->dcObj[1] = new DC(-1,NULL,numFeat,NULL); for(int j=0;j<retNode->numChild;j++){ newRoot->dcObj[1]->merge(retNode->dcObj[j]); } dctree = newRoot; } } e_end = clock(); e_diff = e_end - e_start; e_time = ((double) e_diff)/((double) CLOCKS_PER_SEC); pCover = 0; numClust = 0; height = 0; coverage = showTree(dctree,0,&pCover,&numClust); printf("Time is %.2lf sec\n",e_time); printf("Height is %d\n",height); printf("numCluster is %d\n",numClust); printf("coverage is %d\n",coverage); printf("p_coverage is %d\n",pCover); int B=0; double A =0.0; for(int i=0;i<numTopic;i++){ A = A + ((double) N3[i])*(F[i]); B = B + N3[i]; } printf("Overall F-measure is %.4lf\n",A/((double) B)); fclose(input); return 1;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -