📄 hhed.c
字号:
float x,v1,v2,sum=0.0; m1 = (s1->spdf.cpdf+1)->mpdf; m2 = (s2->spdf.cpdf+1)->mpdf; V = VectorSize(m1->mean); for (k=1; k<=V; k++) { x = m1->mean[k] - m2->mean[k]; v1 = m1->cov.var[k]; v2 = m2->cov.var[k]; sum += x*x / sqrt(v1*v2); /* The following is closer to the true divergence but the */ /* increased sensitivity to undertrained variances is undesirable */ /* sum += v1/v2 + v2/v1 - 2.0 + (1.0/v1 + 1.0/v2)*x*x; */ } return sqrt(sum/V);}/* GDistance: return general distance between two arbitrary pdfs by summing the log probabilities of each mixture mean with respect to the other pdf */float GDistance(int s, StreamElem *s1, StreamElem *s2){ int m,M; MixtureElem *me; Observation dummy; float sum=0.0; M = s1->nMix; for (m=1,me = s1->spdf.cpdf+1; m<=M; m++, me++) { dummy.fv[s]=me->mpdf->mean; sum += SOutP(hset,s,&dummy,s2); } M = s2->nMix; for (m=1,me = s2->spdf.cpdf+1; m<=M; m++, me++) { dummy.fv[s]=me->mpdf->mean; sum += SOutP(hset,s,&dummy,s1); } return -(sum/M);}/* StateDistance: return distance between given states */float StateDistance(ILink i1, ILink i2){ StateElem *se1, *se2; StateInfo *si1, *si2; StreamElem *ste1,*ste2; float x = 0.0; int s,S; se1 = (StateElem *)i1->item; si1 = se1->info; se2 = (StateElem *)i2->item; si2 = se2->info; S = hset->swidth[0]; ste1 = si1->pdf+1; ste2 = si2->pdf+1; for (s=1;s<=S;s++,ste1++,ste2++) if (hset->hsKind == TIEDHS) x += TDistance(ste1,ste2); else if (hset->hsKind==DISCRETEHS) x += DDistance(ste1,ste2); else if (maxMixes == 1 && ste1->spdf.cpdf[1].mpdf->ckind==DIAGC && ste2->spdf.cpdf[1].mpdf->ckind==DIAGC) x += Divergence(ste1,ste2); else x += GDistance(s,ste1,ste2); return x/S;}/* SetGDist: compute inter group distances */void SetGDist(CLink *cvec, Matrix id, Matrix gd, int N){ int i,j; CLink p,q,pp; float maxd; for (i=1; i<=N; i++) gd[i][i]=0.0; for (i=1; i<N; i++) { p = cvec[i]; for (j=i+1; j<=N; j++) { q = cvec[j]; maxd = 0.0; while (q != NULL) { pp = p; while (pp != NULL) { if (id[pp->idx][q->idx] > maxd) maxd = id[pp->idx][q->idx]; pp = pp->next; } q = q->next; } gd[i][j] = gd[j][i] = maxd; } }}/* SetIDist: compute inter item distances */void SetIDist(CLink *cvec, Matrix id, int N, char type){ ILink ii,jj; int i,j; float dist=0.0; for (i=1; i<=N; i++) id[i][i]=0.0; for (i=1; i<N; i++) { ii=cvec[i]->item; for (j=i+1; j<=N; j++) { jj = cvec[j]->item; switch(type) { case 's': dist = StateDistance(ii,jj); break; default: HError(2640,"SetIDist: Cant compute distances for %c types",type); } id[i][j] = id[j][i] = dist; } }}/* MinGDist: find min inter group distance */float MinGDist(Matrix g, int *ix, int *jx, int N){ int mini,minj; float min; int i,j; min = g[1][2]; mini=1; minj=2; for (i=1; i<N; i++) for (j=i+1; j<=N; j++) if (g[i][j]<min) { min = g[i][j]; mini = i; minj = j; } *ix = mini; *jx = minj; return min;}/* MergeGroups: merge the two specified groups */void MergeGroups(int i, int j, CLink *cvec, int N){ int k; CLink p; p = cvec[i]; while(p->next != NULL) p = p->next; p->next = cvec[j]; for (k=j; k<N; k++) cvec[k] = cvec[k+1];}/* BuildCVec: allocate space for cvec and create item sized groups */CLink *BuildCVec(int numClust, ILink ilist){ CLink *cvec,p; int i; /* Allocate extra space to allow for call to Dispose(&tmpHeap,cvec) */ cvec=(CLink*) New(&tmpHeap,(numClust+1)*sizeof(CLink)); for (i=1;i<=numClust; i++) { p=(CLink) New(&tmpHeap,sizeof(CRec)); if ((p->item = ilist) == NULL) HError(2690,"BuildCVec: numClust<NumItems(ilist) %d",i); ilist = ilist->next; p->idx = i; p->next = NULL; cvec[i] = p; } if (ilist!=NULL) HError(2690,"BuildCVec: numClust>NumItems(ilist)"); return cvec;}/* SetOccSums: return a vector of cluster occupation sums. The occ count for each state was stored in the hook of the StateInfo rec by the RO command */Vector SetOccSums(CLink *cvec, int N){ int i; float sum,x; CLink p; Vector v; StateElem *se; StateInfo *si; ILink ip; v = CreateVector(&tmpHeap,N); for (i=1; i<=N; i++) { sum = 0.0; for (p=cvec[i]; p != NULL; p = p->next) { ip = p->item; se = (StateElem *)ip->item; si = se->info; memcpy(&x,&(si->hook),sizeof(float)); sum += x; } v[i] = sum; } return v;}/* UpdateOccSums: update the occSum array following MergeGroups */void UpdateOccSums(int i, int j, Vector occSum, int N){ int k; occSum[i] += occSum[j]; for (k=j; k<N; k++) occSum[k] = occSum[k+1];}/* MinOccSum: return index of min occ sum */int MinOccSum(Vector occSum, int N){ float min; int mini,i; mini = 1; min = occSum[mini]; for (i=2; i<=N; i++) if (occSum[i]<min) { mini = i; min = occSum[mini]; } return mini;}/* RemOutliers: remove any cluster for which the total state occupation count is below the 'outlierThresh' set by the RO command */void RemOutliers(CLink *cvec, Matrix idist, Matrix gdist, int *numClust, Vector occSum){ int N; /* current num clusters */ int sparsest,i,mini; Vector gd; float min; N = *numClust; sparsest = MinOccSum(occSum,N); while (N>1 && occSum[sparsest] < outlierThresh) { gd = gdist[sparsest]; /* find best merge */ mini = (sparsest==1)?2:1; min = gd[mini]; for (i=2; i<=N; i++) if (i != sparsest && gd[i]<min) { mini = i; min = gd[mini]; } MergeGroups(sparsest,mini,cvec,N); UpdateOccSums(sparsest,mini,occSum,N); --N; SetGDist(cvec,idist,gdist,N); sparsest = MinOccSum(occSum,N); } *numClust = N;}/* Clustering: split ilist into sublists where each sublist contains one cluster of items. Return list of sublists in cList. It uses a simple 'Furthest neighbour hierarchical cluster algorithm' */void Clustering(ILink ilist, int *numReq, float threshold, char type, char *macName){ Vector occSum=NULL; /* array[1..N] of cluster occupation sum */ int numClust; /* current num clusters */ CLink *cvec; /* array[1..numClust] of ->CRec */ Matrix idist; /* item distance matrix */ Matrix gdist; /* group cluster matrix */ CLink p; ILink l; float ming,min; int i,j,k,n,numItems; char buf[40]; if (badGC) { FixAllGConsts(hset); /* in case any bad gConsts around */ badGC=FALSE; } numItems = NumItems(ilist); numClust = numItems; /* each item is separate cluster initially */ if (trace & T_IND) { printf(" Start %d items\n",numClust); fflush(stdout); } cvec = BuildCVec(numClust,ilist); idist = CreateMatrix(&tmpHeap,numClust,numClust); gdist = CreateMatrix(&tmpHeap,numClust,numClust); SetIDist(cvec,idist,numClust,type); /* compute inter-item distances */ CopyMatrix(idist,gdist); /* 1 item per group so dmats same */ ming = MinGDist(gdist,&i,&j,numClust); while (numClust>*numReq && ming<threshold) { /* merge closest two groups */ MergeGroups(i,j,cvec,numClust); --numClust; SetGDist(cvec,idist,gdist,numClust); /* recompute gdist */ ming = MinGDist(gdist,&i,&j,numClust); } n = numClust; if (occStatsLoaded) { if (trace & T_IND) { printf(" Via %d items before removing outliers\n",numClust); fflush(stdout); } occSum = SetOccSums(cvec,numClust); RemOutliers(cvec,idist,gdist,&numClust,occSum); } *numReq = numClust; /* in case this is thresh limited case */ if (trace & T_IND) { printf(" End %d items\n",numClust); fflush(stdout); } for (i=1; i<=numClust; i++) { if (trace & T_CLUSTERS) { for (j=1,min=99.999,k=0;j<=numClust;j++) if (i!=j && gdist[i][j]<min) min=gdist[i][j],k=j; printf(" C.%-2d MinG %6.3f[%d]",i,min,k); if (occSum != NULL) printf(" (%.1f) ==",occSum[i]); for (p=cvec[i]; p!=NULL; p=p->next) printf(" %s",HMMPhysName(hset,p->item->owner)); printf("\n"); fflush(stdout); } sprintf(buf,"%s%d",macName,i); /* construct macro name */ for (p=cvec[i],l=NULL;p!=NULL;p=p->next) p->item->next=l,l=p->item; /* and item list */ ApplyTie(l,buf,type); /* and tie it */ FreeItems(&l); /* free items in sub list */ } Dispose(&tmpHeap,cvec);}/* ---------- Up Num Mixtures Operations --------------------- *//* SetGCStats: scan loaded models and compute average gConst */void SetGCStats(void){ HMMScanState hss; LogDouble sum = 0.0, sumsq = 0.0; float x; int count = 0; FixAllGConsts(hset); /* in case any bad gConsts around */ badGC=FALSE; NewHMMScan(hset,&hss); while(GoNextMix(&hss,FALSE)) { x = hss.me->mpdf->gConst; sum += x; sumsq += x*x; count++; } EndHMMScan(&hss); meanGC = sum / count; stdGC = sqrt(sumsq / count - meanGC*meanGC); if (trace & T_IND) { printf(" Mean GC = %f, Std Dev GC = %f\n",meanGC,stdGC); fflush(stdout); }}/* HeaviestMix: find the heaviest mixture in me, the hook field of each mix holds a count of the number of times that the mixture has been split. Each mixture is then scored as: mixture weight - num splits but if the mix gConst is less than 4 x stddev below average then the score is reduced by 5000 making it very unlikely to be selected */int HeaviestMix(char *hname, MixtureElem *me, int M){ float max,w,gThresh; int m,maxm; MixPDF *mp; gThresh = meanGC - 4.0*stdGC; maxm = 1; mp = me[1].mpdf; max = me[1].weight - (int)mp->hook; if ((int)mp->hook < 5000 && mp->gConst < gThresh) { max -= 5000.0; mp->hook = (void *)5000; HError(-2637,"HeaviestMix: mix 1 in %s has v.small gConst [%f]", hname,mp->gConst); } for (m=2; m<=M; m++) { mp = me[m].mpdf; w = me[m].weight - (int)mp->hook; if ((int)mp->hook < 5000 && mp->gConst < gThresh) { w -= 5000.0; mp->hook = (void *)5000; HError(-2637,"HeaviestMix: mix %d in %s has v.small gConst [%f]", m,hname,mp->gConst); } if (w>max) { max = w; maxm = m; } } if (me[maxm].weight<=MINMIX) HError(2697,"HeaviestMix: heaviest mix is defunct!"); if (trace & T_DET) { printf(" : Split %d (weight=%.3f, count=%d, score=%.3f)\n", maxm, me[maxm].weight, (int)me[maxm].mpdf->hook, max); fflush(stdout); } return maxm;}/* UpMix: increase number of mixes in stream from oldM to newM */void UpMix(char *hname, StreamElem *ste, int oldM, int newM){ MixtureElem *me,m1,m2; int m,count,vSize; me = (MixtureElem*) New(&hmmHeap,sizeof(MixtureElem)*newM); --me; vSize = VectorSize(ste->spdf.cpdf[1].mpdf->mean); for (m=1;m<=oldM;m++) me[m] = ste->spdf.cpdf[m]; count=oldM; while (count<newM) { m = HeaviestMix(hname,me,count); ++count; SplitMix(me+m,&m1,&m2,vSize); me[m] = m1; me[count] = m2; } ste->spdf.cpdf = me; ste->nMix = newM;}/* CountDefunctMix: return number of defunct mixtures in given stream */int CountDefunctMix(StreamElem *ste){ int m,defunct; for (m=1,defunct=0; m<=ste->nMix; m++) if (ste->spdf.cpdf[m].weight <= MINMIX) ++defunct; return defunct;}/* FixDefunctMix: restore n defunct mixtures by successive mixture splitting */void FixDefunctMix(char *hname,StreamElem *ste, int n){ MixtureElem *me,m1,m2; int m,M,l,count,vSize; me = ste->spdf.cpdf; M = ste->nMix; vSize
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -