📄 scoretfbs.c
字号:
#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <ghmm/ghmm.h>#include <ghmm/mes.h>#include <ghmm/rng.h>#include <ghmm/sequence.h>#include <ghmm/model.h>#include <ghmm/viterbi.h>#define MAXSEQ 5000#define ALPHASIZE 4double background[ALPHASIZE] = {0.25, 0.25, 0.25, 0.25};int SeqNumber;int seq_len[MAXSEQ];int *seq_array[MAXSEQ];int *internalrep(char *seq_c, int len) { int *int_seq = (int*) malloc(sizeof(int)*len); int i, sym; sym = 0; for(i=0; i < len; i++) { switch(seq_c[i]) { case 'A': sym = 0; break; case 'C': sym = 1; break; case 'G': sym = 2; break; case 'T': sym = 3; break; } int_seq[i] = sym; } return int_seq;}void readfasta(char *fn) { char line[5000]; char seq_c[50000]; FILE *fp; int n; SeqNumber = 0; fp = fopen(fn,"r"); seq_c[0] = '\0'; // get first line fgets(line, 5000, fp); printf("%s\n", line); while( fgets(line, 5000, fp) != NULL ) { if (line[0] != '>') { n = strlen(line); line[n-1]='\0'; strcat(seq_c, line); } else { seq_len[ SeqNumber] = strlen(seq_c); // without terminating character seq_array[ SeqNumber] = internalrep(seq_c, strlen(seq_c)); SeqNumber++; // next header seq_c[0] = '\0'; } } // last sequence printf("%s\n", seq_c); seq_len[ SeqNumber] = strlen(seq_c); // without terminating character seq_array[ SeqNumber] = internalrep(seq_c, strlen(seq_c)); SeqNumber++; printf("\t readfasta, %d sequences \n", SeqNumber); fclose(fp);}double **tfbs_counts(char *fn, int *len_tfbs) { FILE *fp; double temp[500][ALPHASIZE]; double **result; int a[ALPHASIZE]; int i,j,len,bp; double sum; len = 0; fp = fopen(fn,"r"); while( fscanf(fp, "%d %d %d %d", a, a+1, a+2, a+3) != EOF ) { sum = 0.0; for(i=0; i < ALPHASIZE; i++) { if (a[i] == 0) { temp[len][i] = 1.0; } else { temp[len][i] = (double)a[i]; } sum += temp[len][i]; } for(i=0; i < ALPHASIZE; i++) { temp[len][i] /= sum; } len++; } result= (double**) malloc(sizeof(double*) * len); for(i=0; i < len; i++) { result[i] = (double*) malloc(sizeof(double) * ALPHASIZE); for(bp=0; bp < ALPHASIZE; bp++) { result[i][bp] = temp[i][bp]; } } *len_tfbs = len; fclose(fp); return result;}void fill_state(model *dmo, int id, int next, int parent, double inprob, double *emission) { int m; dmo->s[id].out_states = 1; // to itself, first tf dmo->s[id].in_states = 1; // from itself, last tf m_calloc(dmo->s[id].out_id, 1); m_calloc(dmo->s[id].out_a, 1); m_calloc(dmo->s[id].in_id, 1); m_calloc(dmo->s[id].in_a, 1); m_calloc(dmo->s[id].b , dmo->M); for (m = 0; m < dmo->M; m++) { dmo->s[id].b[m] = emission[m]; printf("%2.2g ", emission[m]); } printf("\n"); dmo->s[id].pi = 0.0; dmo->s[id].out_states = 1; dmo->s[id].in_states = 1; dmo->s[id].out_id[0] = next; dmo->s[id].out_a [0] = 1.0; dmo->s[id].in_id[0] = parent; dmo->s[id].in_a [0] = inprob;}model *mk_tfbs(char *fn, double pb, double *backdist) { double **emission; int len_tfbs; int nstates; int i,m; model *dmo; emission = tfbs_counts(fn, &len_tfbs); printf("mk_tfbs, length = %d \n", len_tfbs); nstates = len_tfbs + 1; // create the linear model dmo = (model*) malloc(sizeof(model)); dmo->N = nstates; dmo->M = ALPHASIZE; dmo->s = (state*) malloc(sizeof(state) * nstates); dmo->prior = -1; // background dmo->s[0].out_states = 2; // to itself, first tf dmo->s[0].in_states = 2; // from itself, last tf m_calloc(dmo->s[0].out_id, 2); m_calloc(dmo->s[0].out_a, 2); m_calloc(dmo->s[0].in_id, 2); m_calloc(dmo->s[0].in_a, 2); m_calloc(dmo->s[0].b, dmo->M); for (m = 0; m < dmo->M; m++) dmo->s[0].b[m] = backdist[m]; dmo->s[0].pi = 1.0; dmo->s[0].out_states = 2; dmo->s[0].in_states = 2; dmo->s[0].out_id[0] = 0; dmo->s[0].out_a [0] = pb; dmo->s[0].out_id[1] = 1; // to first tf-site dmo->s[0].out_a [1] = 1.0 - pb; dmo->s[0].in_id[0] = 0; dmo->s[0].in_a [0] = pb; dmo->s[0].in_id[1] = nstates-1; // last tf-site dmo->s[0].in_a [1] = 1.0; i = 1; fill_state(dmo, i, i+1, i-1, 1.0-pb, emission[i-1]); // first tf-site for(i = 2; i < nstates-1; i++) { fill_state(dmo, i, i+1, i-1, 1.0, emission[i-1]); } fill_state(dmo, i, 0, i-1, 1.0, emission[i-1]); // last tf-site // No silent states dmo->model_type = kSilentStates; dmo->silent = (int *) malloc(sizeof(int) * nstates); for(i = 0; i < nstates; i++) dmo->silent[i] = 0; dmo->topo_order = NULL; dmo->tied_to = NULL; for(i=0; i < len_tfbs; i++) free(emission[i]); free(emission); return dmo;}void find_tfbs(model *dmo) { int *int_path; int n,k; double log_p; /**** int SeqNumber; int seq_len[MAXSEQ]; int *seq_array[MAXSEQ]; ********/ for(n = 0; n < 100; n++) { int_path = viterbi(dmo, seq_array[n], seq_len[n], &log_p); k = (seq_len[n]*dmo->N) - 2; /**** while (int_path[k] != -1 ) { printf("%d, ", int_path[k--]); }****/ printf("Seq %2d: length = %d, log p = %.2g \n", n, seq_len[n], log_p); free(int_path); }}void cleanmem() { int i; for(i=0; i < SeqNumber; i++) free(seq_array[i]);}int tfnames[5] = {1, 12, 10, 0, 20};int main(int argc, char *argv[]) { int i; model *dmo; char names[100]; char path[100]; gsl_rng_init(); readfasta(argv[1]); for(i = 0; i < 5; i++) { strcpy( path, argv[2] ); sprintf(names, "%d%s", tfnames[i], ".TF"); printf("%s\n", strcat(path, names)); dmo = mk_tfbs(path, 0.9, background); //model_A_print(stdout, dmo, "\t", ",", "\n"); //model_B_print(stdout, dmo, "\t", ",", "\n"); find_tfbs(dmo); model_free(&dmo); } cleanmem(); return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -