meme.c

来自「EM算法的改进」· C语言 代码 · 共 513 行 · 第 1/2 页

C
513
字号
    /* Do the reduction. */    reduce_across_models(model, dataset->alength);    /*fprintf(stderr, "%d: past reduction\n", mpMyID()); fflush(stderr);*/#endif /* PARALLEL */    /* quit if model has too few sites */    if (model->nsites_dis < MINSITES) break;    /*fprintf(stderr, "%d: past few sites\n", mpMyID()); fflush(stderr);*/    /* quit if model fails E-value test */    if (model->logev > log(dataset->evt)) break;    /*fprintf(stderr, "%d: past E-value\n", mpMyID()); fflush(stderr);*/    /* Store the total number of EM's in the model. */    model->iter = iter;    /* ERASE the site and starts */    /*fprintf(stderr, "%d: at erase\n", mpMyID()); fflush(stderr);*/    erase(dataset, model);    /*fprintf(stderr, "%d: past erase\n", mpMyID()); fflush(stderr);*/    /* calculate negative model by doing one EM iteration on negative ex's */    if (neg_model) {      /* copy motif model to negative model */      copy_model(model, neg_model, dataset->alength);      /* get negative model */      neg_dataset->maxiter = 1;			/* 1 iteration of em */      em(neg_model, neg_dataset);      /* ERASE the site and starts */      erase(neg_dataset, neg_model);    }    /* print results */    /*fprintf(stderr, "%d: at print results\n", mpMyID()); fflush(stderr);*/    if (!no_print) print_results(dataset, neg_dataset, model, neg_model,      candidates);    /*fprintf(stderr, "%d: past print results\n", mpMyID()); fflush(stderr);*/    /* stop if out of time */    if (dataset->max_time && imotif<nmotifs) { 	/* considering another motif */      stop_time = myclock()/1E6;		/* current time */#ifdef PARALLEL      /* use stop_time from process 0 to avoid it stopping while others continue*/      /*fprintf(stderr, "%d: time broadcast size %d\n", mpMyID(),sizeof(stop_time)); fflush(stderr);	  */      mpBroadcast((void *)&stop_time, sizeof(stop_time), 0);      /*fprintf(stderr, "%d: past time broadcast\n", mpMyID()); fflush(stderr); */#endif      /* record time if this is motif 1 */      if (imotif == 1) m1time = stop_time;      if ((dataset->max_time - stop_time) < m1time) {        ++imotif;				/* this motif OK */        break;      }    } /* check time */        /*fprintf(stderr, "%d: past check time\n", mpMyID()); fflush(stderr); */  } /* nmotifs loop */   --imotif;				/* number of motifs found */  /* print the motif block diagrams using all the motifs */  if (!no_print && imotif)      print_summary(model, dataset, los, imotif, pv, stdout);  /* print reason for stopping */  printf("\n"); PSTARS;  if (n_starts == 0) {    printf("Stopped because couldn't find any more starting points for EM.\n");  } else if (imotif == nmotifs) {    printf("Stopped because nmotifs = %d reached.\n", nmotifs);  } else if (dataset->max_time && (dataset->max_time - stop_time) < m1time) {    printf("Stopped because would probably run out of time (%.2f secs).\n",       dataset->max_time);  } else if (model->nsites_dis < MINSITES) {    printf("Stopped because next motif has fewer than %d sites.\n",      MINSITES);  } else {    printf("Stopped because motif E-value > %8.2e.\n", dataset->evt);  }  PSTARS;  fflush(stdout);#ifdef UNIX  if (!no_print) system("echo ''; echo CPU: `hostname`; echo '' ");#else  if (!no_print) printf("\nCPU: unknown\n\n");#endif /* UNIX */  PSTARS;  if (!NO_STATUS) fprintf(stderr, "\n");#ifdef PARALLEL   mpFinalize();#endif  return(0);} /* main *//**********************************************************************//*	erase        For all models:	  Reset the weights of the letters to probabilisticaly "erase"	  the ones which occur in sites already found.*//**********************************************************************/static void erase(  DATASET *dataset,			/* the dataset */  MODEL *model	 			/* the model */){  int i, j, k;  int n_samples = dataset->n_samples;		/* number of sequences */  SAMPLE **samples = dataset->samples;		/* the sequences */  int w = model->w;				/* width of motif */  /*     Set z from the maxima stored in the learned model.  */    /*fprintf(stderr, "%d: at set_z\n", mpMyID()); fflush(stderr); */  set_z(model, dataset);    /*fprintf(stderr, "%d: past set_z\n", mpMyID()); fflush(stderr); */  /*      z_ij is taken as the probability of a site occurring at i,j.     The probability of a position being in a site is taken     as the maximum of the z_ij for sites containing (overlapping) it.     w_ij is set to 1-max(z_ij) times its previous value which     reflects the independence assumption among motifs.  */  for (i=0; i<n_samples; i++) 		{	/* sequence */    double *weights = samples[i]->weights;	/* w_ij */    int lseq = samples[i]->length;		/* seq length */    double *zi = samples[i]->z;			/* z_ij */    /*if (mpMyID()==0)fprintf(stderr, "0: i loop %d lseq %d\n", i, lseq); fflush(stderr); */    if (lseq < w) continue;			/* sample too short for motif */    for (j=0; j<lseq; j++) {			/* position */      double max_z = 0.0;      /*if (mpMyID()==0)fprintf(stderr, "0: j loop %d\n", j); fflush(stderr); */      /* find largest probability that site overlaps this position */      for (k=MAX(0,j-w+1); k<=j && k<lseq-w+1; k++) {        /*if (mpMyID()==0)fprintf(stderr, "0: k loop %d\n", k); fflush(stderr);		*/	max_z = MAX(max_z, zi[k]);      }      max_z = MIN(1.0, max_z);			/* fix roundoff errors */      /* update the probability that position not in a site */      weights[j] *= 1.0 - max_z;    }  }  if (PRINT_W) print_wij(dataset);} /* erase *//**********************************************************************//*	init_model	Initialize a model from a starting point.	Returns false if starting point was not valid.*//**********************************************************************/static BOOLEAN init_model(  S_POINT *s_point,			/* the starting point */  MODEL *model,				/* the model to intialize */  DATASET *dataset,			/* the dataset */  int imotif				/* motif number */){  int w0;     /* skip if no good starting points found for w0, nsites0 */  if (s_point->score == LITTLE) { return FALSE; }  /* initialize the new motif */  strcpy(model->cons0, s_point->cons0);  w0 = model->w = model->pw = s_point->w0;  init_theta(model->theta, s_point->e_cons0, w0, dataset->map,dataset->alength);  /* initialize lambda */  model->lambda = MIN(s_point->nsites0/wps(dataset, w0), 1);  model->pal = dataset->pal;  /* initialize prior estimate of number of sites */  model->psites = s_point->nsites0;  if (PRINTALL) {    printf("component %2d: lambda= %8.6f ps= %8.0f\n",       1, model->lambda, wps(dataset, w0));    print_theta(0, 2, model->nsites_dis, model->theta, model->w, 0, "",       dataset, stdout);  }  /* init motif number */  model->imotif = imotif;  model->iseq = s_point->iseq;  model->ioff = s_point->ioff;  return TRUE;} /* init_model *//**********************************************************************//*	save_candidate	Save the starting point and part of model if it is	most significant model of its width so far:		model->sig is smallest	Returns true if the model is the best so far among all widths.*/ /**********************************************************************/static BOOLEAN save_candidate(  MODEL *model,				/* final model */  DATASET *dataset,			/* the dataset */  S_POINT *s_point,			/* starting point */  CANDIDATE *candidates,		/* candidate for best model of width */  double best_sig			/* best motif significance */){  int w = model->w;			/* final motif w */  /* objective function value */  double sig = dataset->objfun==Pv ? model->logpv : model->logev;  /* print the results for this w0, nsites0 and THETA */   if (PRINT_STARTS) {     printf("\n(start) %3d %6.1f %.*s --> %s ",       s_point->w0, s_point->nsites0, s_point->w0, s_point->cons0, model->cons);    printf("w %3d nsites %4d sig %20.10g\n\n", w, model->nsites_dis, exp(sig));    fflush(stdout);  }   /* save the results if best so far for this width */   if (sig < candidates[w].sig) {    candidates[w].s_point = s_point;    candidates[w].w = w;    candidates[w].pal = model->pal;    candidates[w].invcomp = model->invcomp;    candidates[w].lambda = model->lambda;    strcpy(candidates[w].cons, model->cons);    candidates[w].rel = model->rel;    candidates[w].ll = model->ll;    candidates[w].sig = sig;  }  return (sig < best_sig);} /* save candidates */

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?