⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 scoretfbs.c

📁 一个通用的隐性马尔可夫C代码库 开发环境:C语言 简要说明:这是一个通用的隐性马尔可夫C代码库
💻 C
字号:
#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <ghmm/ghmm.h>#include <ghmm/mes.h>#include <ghmm/rng.h>#include <ghmm/sequence.h>#include <ghmm/model.h>#include <ghmm/viterbi.h>#define MAXSEQ      5000#define ALPHASIZE   4double background[ALPHASIZE] = {0.25, 0.25, 0.25, 0.25};int SeqNumber;int    seq_len[MAXSEQ];int *seq_array[MAXSEQ];int *internalrep(char *seq_c, int len) {  int *int_seq = (int*) malloc(sizeof(int)*len);  int i, sym;    sym = 0;  for(i=0; i < len; i++) {    switch(seq_c[i]) {      case 'A': 	sym = 0;	break;      case 'C': 	sym = 1;	break;      case 'G': 	sym = 2;	break;      case 'T': 	sym = 3;	break;    }    int_seq[i] = sym;  }  return int_seq;}void readfasta(char *fn) {  char line[5000];  char seq_c[50000];  FILE *fp;  int n;  SeqNumber = 0;  fp = fopen(fn,"r");  seq_c[0] = '\0';  // get first line  fgets(line, 5000, fp);  printf("%s\n", line);   while( fgets(line, 5000, fp) != NULL ) {    if (line[0] != '>') {      n = strlen(line);      line[n-1]='\0';      strcat(seq_c, line);    } else {      seq_len[ SeqNumber]   = strlen(seq_c); // without terminating character      seq_array[ SeqNumber] = internalrep(seq_c, strlen(seq_c));      SeqNumber++;      // next header      seq_c[0] = '\0';    }  }  // last sequence    printf("%s\n", seq_c);  seq_len[ SeqNumber]   = strlen(seq_c); // without terminating character  seq_array[ SeqNumber] = internalrep(seq_c, strlen(seq_c));  SeqNumber++;  printf("\t readfasta, %d sequences \n", SeqNumber);  fclose(fp);}double **tfbs_counts(char *fn, int *len_tfbs) {  FILE *fp;  double temp[500][ALPHASIZE];  double **result;  int a[ALPHASIZE];  int i,j,len,bp;  double sum;  len = 0;  fp = fopen(fn,"r");  while(  fscanf(fp, "%d %d %d %d", a, a+1, a+2, a+3) != EOF ) {    sum = 0.0;    for(i=0; i < ALPHASIZE; i++) {      if (a[i] == 0) {	temp[len][i] = 1.0;      }      else {	temp[len][i] = (double)a[i];      }      sum += temp[len][i];    }    for(i=0; i < ALPHASIZE; i++) {      temp[len][i] /= sum;    }    len++;  }    result= (double**) malloc(sizeof(double*) * len);  for(i=0; i < len; i++) {    result[i] = (double*) malloc(sizeof(double) * ALPHASIZE);    for(bp=0; bp < ALPHASIZE; bp++) {      result[i][bp] = temp[i][bp];    }  }  *len_tfbs = len;  fclose(fp);  return result;}void fill_state(model *dmo, int id, int next, int parent, double inprob, double *emission) {  int m;  dmo->s[id].out_states = 1; // to itself, first tf   dmo->s[id].in_states  = 1; // from itself, last tf   m_calloc(dmo->s[id].out_id, 1);  m_calloc(dmo->s[id].out_a, 1);  m_calloc(dmo->s[id].in_id, 1);  m_calloc(dmo->s[id].in_a, 1);  m_calloc(dmo->s[id].b   , dmo->M);  for (m = 0; m < dmo->M; m++) {    dmo->s[id].b[m] = emission[m];    printf("%2.2g  ", emission[m]);  }  printf("\n");  dmo->s[id].pi = 0.0;  dmo->s[id].out_states = 1;  dmo->s[id].in_states  = 1;  dmo->s[id].out_id[0] = next;  dmo->s[id].out_a [0] = 1.0;  dmo->s[id].in_id[0] = parent;  dmo->s[id].in_a [0] = inprob;}model *mk_tfbs(char *fn, double pb, double *backdist) {  double **emission;  int len_tfbs;  int nstates;  int i,m;  model *dmo;  emission = tfbs_counts(fn, &len_tfbs);  printf("mk_tfbs, length = %d \n", len_tfbs);  nstates = len_tfbs + 1;  // create the linear model  dmo = (model*) malloc(sizeof(model));  dmo->N = nstates;  dmo->M = ALPHASIZE;  dmo->s = (state*) malloc(sizeof(state) * nstates);  dmo->prior = -1;    // background  dmo->s[0].out_states = 2; // to itself, first tf   dmo->s[0].in_states  = 2; // from itself, last tf   m_calloc(dmo->s[0].out_id, 2);  m_calloc(dmo->s[0].out_a, 2);  m_calloc(dmo->s[0].in_id, 2);  m_calloc(dmo->s[0].in_a, 2);  m_calloc(dmo->s[0].b, dmo->M);  for (m = 0; m < dmo->M; m++)    dmo->s[0].b[m] = backdist[m];  dmo->s[0].pi = 1.0;  dmo->s[0].out_states = 2;  dmo->s[0].in_states  = 2;  dmo->s[0].out_id[0] = 0;  dmo->s[0].out_a [0] = pb;  dmo->s[0].out_id[1] = 1; // to first tf-site  dmo->s[0].out_a [1] = 1.0 - pb;  dmo->s[0].in_id[0] = 0;  dmo->s[0].in_a [0] = pb;  dmo->s[0].in_id[1] = nstates-1; // last tf-site  dmo->s[0].in_a [1] = 1.0;  i = 1;  fill_state(dmo, i, i+1, i-1, 1.0-pb, emission[i-1]); // first tf-site  for(i = 2; i < nstates-1; i++) {    fill_state(dmo, i, i+1, i-1, 1.0, emission[i-1]);  }  fill_state(dmo, i, 0, i-1, 1.0, emission[i-1]); // last tf-site  // No silent states  dmo->model_type = kSilentStates;  dmo->silent     = (int *) malloc(sizeof(int) * nstates);  for(i = 0; i < nstates; i++)     dmo->silent[i] = 0;  dmo->topo_order = NULL;  dmo->tied_to    = NULL;  for(i=0; i < len_tfbs; i++)    free(emission[i]);  free(emission);  return dmo;}void find_tfbs(model *dmo) {  int *int_path;  int n,k;  double log_p;  /****       int SeqNumber;       int    seq_len[MAXSEQ];       int *seq_array[MAXSEQ];  ********/  for(n = 0; n < 100; n++) {    int_path = viterbi(dmo, seq_array[n], seq_len[n], &log_p);    k = (seq_len[n]*dmo->N) - 2;    /****    while (int_path[k] != -1 ) {      printf("%d, ", int_path[k--]);      }****/    printf("Seq %2d: length = %d, log p = %.2g \n", n, seq_len[n], log_p);    free(int_path);  }}void cleanmem() {  int i;  for(i=0; i < SeqNumber; i++)    free(seq_array[i]);}int tfnames[5] = {1, 12, 10, 0, 20};int main(int argc, char *argv[]) {  int i;  model *dmo;  char names[100];  char path[100];  gsl_rng_init();  readfasta(argv[1]);  for(i = 0; i < 5; i++) {    strcpy( path, argv[2] );    sprintf(names, "%d%s", tfnames[i], ".TF");    printf("%s\n", strcat(path, names));    dmo = mk_tfbs(path, 0.9, background);    //model_A_print(stdout, dmo, "\t", ",", "\n");    //model_B_print(stdout, dmo, "\t", ",", "\n");        find_tfbs(dmo);    model_free(&dmo);  }  cleanmem();  return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -