📄 dctree.cc
字号:
#include <stdio.h>#include <stdlib.h>#include <ctype.h>#include <string.h>#include <iostream.h>#include <time.h>#include <math.h>#include "../MyLib/MyLib.h"#include "../MyLib/DC.h"struct aNode{ DC **dcObj; int isLeaf,numChild; struct aNode **child;};typedef struct aNode node;FILE *topicFile;int numDoc,minChild,maxChild,numFeat,numTopic,t3;int *N3,*T,height;char **feature,**topic;double simThres,t1,t2,*F;double sim0(DC *dc1,DC *dc2){ double s,w1,w2,prod,norm1,norm2; prod = 0.0; norm1 = 0.0; norm2 = 0.0; for(int i=0;i<numFeat;i++){ w1 = ((double) dc1->W[i])/((double) dc1->N); w2 = ((double) dc2->W[i])/((double) dc2->N); prod = prod+(w1*w2); norm1 = norm1+(w1*w1); norm2 = norm2+(w2*w2); } s = prod/sqrt(norm1*norm2); return s;}double sim2(DC *dc1,DC *dc2){ double s,dist,w1,w2; dist = 0.0; for(int i=0;i<numFeat;i++){ w1 = ((double) dc1->W[i])/((double) dc1->N); w2 = ((double) dc2->W[i])/((double) dc2->N); dist = dist + fabs(w1-w2); } s = 1.0-(dist/((double) numFeat)); return s;}double sim1(DC *dc1,DC *dc2){ double s,dist,w1,w2,diff; dist = 0.0; for(int i=0;i<numFeat;i++){ w1 = ((double) dc1->W[i])/((double) dc1->N); w2 = ((double) dc2->W[i])/((double) dc2->N); diff = w1-w2; dist = dist+(diff*diff); } s = 1.0-(sqrt(dist)/((double) numFeat)); return s;}double closest(node *node,DC *dc,int *tEnt){ double s,maxSim; maxSim = -1.0; *tEnt = 0; for(int i=0;i<node->numChild;i++){ s = sim2(node->dcObj[i],dc); if(maxSim < s){ *tEnt = i; maxSim = s; } } return maxSim;}node *splitNode(node **root,node *retNode){ int nc1,nc2,NC1,NC2; node *newNode; nc1 = (maxChild+1)/2; nc2 = maxChild - nc1 + 1; /* find the first elements of two groups */ DC *tmpDC; double s,minSim; int seed1,seed2; seed1 = 0; seed2 = 1; minSim = 1.1; tmpDC = new DC(-1,NULL,numFeat,NULL); for(int i=0;i<retNode->numChild;i++){ tmpDC->merge(retNode->dcObj[i]); } for(int i=0;i<maxChild;i++){ for(int j=i+1;j<=maxChild;j++){ if(j < maxChild){ s = sim2((* root)->dcObj[i],(* root)->dcObj[j]); }else{ s = sim2((* root)->dcObj[i],tmpDC); } if(s < minSim){ seed1 = i; seed2 = j; minSim = s; }else if(s == minSim && ((i+j)%2 == 1)){ seed1 = i; seed2 = j; } } } /* assign the first elements to the two groups */ node **group1,**group2; DC **gDC1,**gDC2; int M; M = maxChild-minChild+1; group1 = new (node *)[M]; gDC1 = new (DC *)[M]; if(seed1 < maxChild){ group1[0] = (* root)->child[seed1];// (* root)->child[seed1] = NULL; gDC1[0] = (* root)->dcObj[seed1];// (* root)->dcObj[seed1] = NULL; }else{ group1[0] = retNode; gDC1[0] = tmpDC; } NC1 = 1; group2 = new (node *)[M]; gDC2 = new (DC *)[M]; if(seed2 < maxChild){ group2[0] = (* root)->child[seed2];// (* root)->child[seed2] = NULL; gDC2[0] = (* root)->dcObj[seed2];// (* root)->dcObj[seed2] = NULL; }else{ group2[0] = retNode; gDC2[0] = tmpDC; } NC2 = 1; /* assign rest entries to the groups */ int assQ[maxChild-1],grpQ[maxChild+1],k; double simQ[maxChild+1],tmpSim; grpQ[seed1] = 0; simQ[seed1] = 0.0; grpQ[seed2] = 0; simQ[seed2] = 0.0; k = 0; for(int i=0;i<=maxChild;i++){ if(i != seed1 && i != seed2){ if(i < maxChild){ if(seed1 < maxChild){ tmpSim = sim2((* root)->dcObj[i],(* root)->dcObj[seed1]); }else{ tmpSim = sim2((* root)->dcObj[i],tmpDC); } grpQ[i] = 2; if(seed2 < maxChild){ simQ[i] = sim2((* root)->dcObj[i],(* root)->dcObj[seed2]); }else{ simQ[i] = sim2((* root)->dcObj[i],tmpDC); } if(tmpSim > simQ[i]){ simQ[i] = tmpSim; grpQ[i] = 1; } int j=0; while(j<k){ if(simQ[assQ[j]] < simQ[i]){ for(int l=k;l>j;l--){ assQ[l] = assQ[l-1]; } assQ[j] = i; j = k+1; }else{ j++; } } if(j == k){ assQ[k] = i; } }else{ tmpSim = sim2(tmpDC,(* root)->dcObj[seed1]); grpQ[i] = 2; simQ[i] = sim2(tmpDC,(* root)->dcObj[seed2]); if(tmpSim > simQ[i]){ simQ[i] = tmpSim; grpQ[i] = 1; } int j=0; while(j<k){ if(simQ[assQ[j]] < simQ[i]){ for(int l=k;l>j;l--){ assQ[l] = assQ[l-1]; } assQ[j] = i; j = k+1; }else{ j++; } } if(j == k){ assQ[k] = i; } } k++; } } int r = 0; while(NC1 < M && NC2 < M && r < maxChild-1){ if(grpQ[assQ[r]] == 1){ if(assQ[r] < maxChild){ group1[NC1] = (* root)->child[assQ[r]]; gDC1[NC1] = (* root)->dcObj[assQ[r]]; }else{ group1[NC1] = retNode; gDC1[NC1] = tmpDC; } NC1++; }else{ if(assQ[r] < maxChild){ group2[NC2] = (* root)->child[assQ[r]]; gDC2[NC2] = (* root)->dcObj[assQ[r]]; }else{ group2[NC2] = retNode; gDC2[NC2] = tmpDC; } NC2++; } r++; } while(r < maxChild-1){ if(NC2 == M){ if(assQ[r] < maxChild){ group1[NC1] = (* root)->child[assQ[r]]; gDC1[NC1] = (* root)->dcObj[assQ[r]]; }else{ group1[NC1] = retNode; gDC1[NC1] = tmpDC; } NC1++; }else{ if(assQ[r] < maxChild){ group2[NC2] = (* root)->child[assQ[r]]; gDC2[NC2] = (* root)->dcObj[assQ[r]]; }else{ group2[NC2] = retNode; gDC2[NC2] = tmpDC; } NC2++; } r++; }/*printf("tmpDC: "); dNode *dt; dt = tmpDC->docList; while(dt != NULL){ printf("%d ",dt->ID); dt = dt->next; } printf("\n");for(int i=0;i<maxChild-1;i++){ printf("%d ",assQ[i]);} printf("\n");for(int i=0;i<=maxChild;i++){ printf("%d ",grpQ[i]);} printf("\n");printf("group 1: seed = %d",seed1);for(int i=0;i<NC1;i++){ dNode *dl; dl = gDC1[i]->docList; printf("dc%d( ",i); while(dl != NULL){ printf("%d ",dl->ID); dl = dl->next; } printf(") ");}printf("\n");printf("group 2: seed = %d",seed2);for(int i=0;i<NC2;i++){ dNode *dl; dl = gDC2[i]->docList; printf("dc%d( ",i); while(dl != NULL){ printf("%d ",dl->ID); dl = dl->next; } printf(") ");}printf("\n");*/ newNode = (node *)malloc(sizeof(node)); newNode->isLeaf = (* root)->isLeaf; newNode->numChild = NC2; newNode->child = new (node *)[maxChild]; newNode->dcObj = new (DC *)[maxChild]; for(int i=0;i<NC2;i++){ newNode->child[i] = group2[i]; newNode->dcObj[i] = gDC2[i]; } for(int i=NC2;i<maxChild;i++){ newNode->child[i] = NULL; newNode->dcObj[i] = NULL; } (* root)->numChild = NC1; for(int i=0;i<NC1;i++){ (* root)->child[i] = group1[i]; (* root)->dcObj[i] = gDC1[i]; } for(int i=NC1;i<maxChild;i++){ (* root)->child[i] = NULL; (* root)->dcObj[i] = NULL; } return newNode;}/*node *splitLeaf(node **root,DC *ent){ int nc1,nc2; nc1 = (maxChild+1)/2; nc2 = maxChild - nc1 + 1; node *newNode; newNode = (node *)malloc(sizeof(node)); newNode->isLeaf = (* root)->isLeaf; newNode->numChild = nc2; newNode->child = new (node *)[maxChild]; newNode->dcObj = new (DC *)[maxChild]; for(int i=0;i<maxChild;i++){ newNode->child[i] = NULL; newNode->dcObj[i] = NULL; } newNode->dcObj[0] = ent; for(int i=0;i<nc2-1;i++){ newNode->dcObj[i+1] = (* root)->dcObj[nc1+i]; (* root)->dcObj[nc1+i] = NULL; } (* root)->numChild = nc1; return newNode; }*/node *insert(node **root,DC *ent){ double sim; int targetEnt; sim = closest((* root),ent,&targetEnt); if((* root)->isLeaf == 1){ if(sim >= simThres){ (* root)->dcObj[targetEnt]->merge(ent); return NULL; }else if((* root)->numChild < maxChild){ (* root)->dcObj[(* root)->numChild] = ent; ((* root)->numChild)++; return NULL; }else{ /* splitting the leaf node */ node *tmpNode; tmpNode = (node *)malloc(sizeof(node)); tmpNode->isLeaf = 1; tmpNode->numChild = 1; tmpNode->child = new (node *)[maxChild]; tmpNode->dcObj = new (DC *)[maxChild]; for(int i=0;i<maxChild;i++){ tmpNode->child[i] = NULL; tmpNode->dcObj[i] = NULL; } tmpNode->dcObj[0] = ent; return splitNode(root,tmpNode); } }else{ node *retNode; if(sim >= t1){ (* root)->dcObj[targetEnt]->merge(ent); retNode = insert(&((* root)->child[targetEnt]),ent); }else if((* root)->numChild < maxChild){ node *tmpNode; tmpNode = (node *)malloc(sizeof(node)); tmpNode->isLeaf = 1; tmpNode->numChild = 1; tmpNode->child = new (node *)[maxChild]; tmpNode->dcObj = new (DC *)[maxChild]; for(int i=0;i<maxChild;i++){ tmpNode->child[i] = NULL; tmpNode->dcObj[i] = NULL; } tmpNode->dcObj[0] = ent; (* root)->child[(* root)->numChild] = tmpNode; (* root)->dcObj[(* root)->numChild] = new DC(-1,NULL,numFeat,NULL);; (* root)->dcObj[(* root)->numChild]->merge(ent); ((* root)->numChild)++; retNode = NULL; }else{ retNode = (node *)malloc(sizeof(node)); retNode->isLeaf = 1; retNode->numChild = 1;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -