⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 core_algorithms_multialpha.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
#include <stdio.h>#include <math.h>#include <stdlib.h>#include <string.h>#include "structs.h"#include "funcs.h"#define V_LIST_END  -99#define V_LIST_NEXT  -9//#define DEBUG_LABELING_UPDATE//#define DEBUG_DEALLOCATE_LABELINGS//#define DEBUG_FW//#define DEBUG_BW//#define DEBUG_VI//#define PATHextern int verbose;double dot_product_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,			 struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize, int multi_scoring_method);double dot_prouct_picasso_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,				struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize, int multi_scoring_method,				double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3, double *aa_freqs_4);double picasso_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,			 struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize, int multi_scoring_method,			 double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3, double *aa_freqs_4);double picasso_sym_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,			 struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize, int multi_scoring_method,			 double *aa_freqs, double *aa_freqs_2, double *aa_freqs_3, double *aa_freqs_4);double sjolander_score_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,			     struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize, int multi_scoring_method);double sjolander_reversed_score_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,				      struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize,				      int multi_scoring_method);double subst_mtx_product_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,			       struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int normalize, int multi_scoring_method);double subst_mtx_dot_product_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,				   struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int a_index, int a_index_2,				   int a_index_3, int a_index_4,				   int normalize, int multi_scoring_method);double subst_mtx_dot_product_prior_multi(struct hmm_multi_s *hmmp, int use_prior_shares, int use_gap_shares,					 struct msa_sequences_multi_s *msa_seq_infop, int c, int v, int a_index, int a_index_2,					 int a_index_3, int a_index_4,					 int normalize, int multi_scoring_method);double get_msa_emission_score_multi(struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp, int c, int v,				    int use_labels, int normalize, int a_index, int a_index_2,				    int a_index_3, int a_index_4, int scoring_method, int use_gap_shares,				    int use_prior_shares, int multi_scoring_method, double *aa_freqs, double *aa_freqs_2,				    double *aa_freqs_3, double *aa_freqs_4);double get_single_emission_score_multi(struct hmm_multi_s *hmmp, struct letter_s *seq, struct letter_s *seq_2,				       struct letter_s *seq_3, struct letter_s *seq_4, int c, int v, int replacement_letter_c,				       int replacement_letter_c_2, int replacement_letter_c_3,				       int replacement_letter_c_4, int use_labels, int a_index, int a_index_2,				       int a_index_3, int a_index_4, int multi_scoring_method);/************************* the forward algorithm **********************************/int forward_multi(struct hmm_multi_s *hmmp, struct letter_s *s, struct letter_s *s_2, struct letter_s *s_3,		  struct letter_s *s_4, struct forward_s **ret_forw_mtxpp,		  double **ret_scale_fspp, int use_labels, int multi_scoring_method){  struct forward_s *forw_mtxp, *cur_rowp, *prev_rowp; /* pointers to forward matrix */  double *scale_fsp; /* pointer to array of scaling factors */  struct letter_s *seq, *seq_2, *seq_3, *seq_4; /* pointer to the sequence */  int i,j; /* loop variables */  int seq_len; /* length of the sequence */  int c, v, w; /* looping indices, c loops over the sequence,		  v and w over the vertices in the HMM */  double row_sum, res, t_res1, t_res2, t_res3; /* temporary variables to calculate probabilities */  double t_res3_1, t_res3_2, t_res3_3, t_res3_4;  int nr_v;  struct path_element *wp; /* for traversing the paths from v to w */  int a_index, a_index_2, a_index_3, a_index_4; /* holds the current letter's position in the alphabet */  int replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4;  #ifdef DEBUG_FW  printf("running forward\n");#endif  /* Allocate memory for matrix and scaling factors + some initial setup:   * Note 1: forward probability matrix has the sequence indices vertically   * and the vertex indices horizontally meaning it will be filled row by row   * Note 2: *ret_forw_mtxpp and *ret_scale_fspp are allocated here, but must be   * freed by caller */  nr_v = hmmp->nr_v;  seq_len = get_seq_length(s);  *ret_forw_mtxpp = (struct forward_s*)(malloc_or_die((seq_len+2) *						      hmmp->nr_v *						      sizeof(struct forward_s)));  forw_mtxp = *ret_forw_mtxpp;  *ret_scale_fspp = (double*)(malloc_or_die((seq_len+2) * sizeof(double)));  scale_fsp = *ret_scale_fspp;    /* Convert sequence to 1...L for easier indexing */  seq = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));  memcpy(seq+1, s, seq_len * sizeof(struct letter_s));  if(hmmp->nr_alphabets > 1) {    seq_2 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_2+1, s_2, seq_len * sizeof(struct letter_s));  }  if(hmmp->nr_alphabets > 2) {    seq_3 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_3+1, s_3, seq_len * sizeof(struct letter_s));  }  if(hmmp->nr_alphabets > 3) {    seq_4 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_4+1, s_4, seq_len * sizeof(struct letter_s));  }  /* Initialize the first row of the matrix */  forw_mtxp->prob = 1.0; /* sets index (0,0) to 1.0,			    the rest are already 0.0 as they should be */  *scale_fsp = 1.0;  /* Fill in middle rows */  prev_rowp = forw_mtxp;  cur_rowp = forw_mtxp + hmmp->nr_v;  for(c = 1; c <= seq_len; c++) {        /* get alphabet index for c*/    if(hmmp->alphabet_type == DISCRETE) {      replacement_letter_c = NO;      a_index = get_alphabet_index(&seq[c], hmmp->alphabet, hmmp->a_size);      if(a_index < 0) {	a_index = get_replacement_letter_index_multi(&seq[c], hmmp->replacement_letters, 1);	if(a_index < 0) {	  printf("Letter '%s' is not in alphabet\n", (&seq[c])->letter);	  return NOPROB;	}	replacement_letter_c = YES;      }    }    if(hmmp->nr_alphabets > 1) {      if(hmmp->alphabet_type_2 == DISCRETE) {	replacement_letter_c_2 = NO;	a_index_2 = get_alphabet_index(&seq_2[c], hmmp->alphabet_2, hmmp->a_size_2);	if(a_index_2 < 0) {	  a_index_2 = get_replacement_letter_index_multi(&seq_2[c], hmmp->replacement_letters, 2);	  if(a_index_2 < 0) {	    printf("Letter '%s' is not in alphabet\n", (&seq_2[c])->letter);	    return NOPROB;	  }	  replacement_letter_c_2 = YES;	}      }    }    if(hmmp->nr_alphabets > 2) {      if(hmmp->alphabet_type_3 == DISCRETE) {	replacement_letter_c_3 = NO;	a_index_3 = get_alphabet_index(&seq_3[c], hmmp->alphabet_3, hmmp->a_size_3);	if(a_index_3 < 0) {	  a_index_3 = get_replacement_letter_index_multi(&seq_3[c], hmmp->replacement_letters, 3);	  if(a_index_3 < 0) {	    printf("Letter '%s' is not in alphabet\n", (&seq_3[c])->letter);	    return NOPROB;	  }	  replacement_letter_c_3 = YES;	}      }    }    if(hmmp->nr_alphabets > 3) {      if(hmmp->alphabet_type_4 == DISCRETE) {	replacement_letter_c_4 = NO;	a_index_4 = get_alphabet_index(&seq_4[c], hmmp->alphabet_4, hmmp->a_size_4);	if(a_index_4 < 0) {	  a_index_4 = get_replacement_letter_index_multi(&seq_4[c], hmmp->replacement_letters, 4);	  if(a_index_4 < 0) {	    printf("Letter '%s' is not in alphabet\n", (&seq_4[c])->letter);	    return NOPROB;	  }	  replacement_letter_c_4 = YES;	}      }    }#ifdef DEBUG_FW      printf("seq[c] = %s\n", &seq[c]);    if(hmmp->alphabet_type == DISCRETE) {      printf("a_index = %d\n", a_index);    }#endif    /* calculate sum of probabilities */    row_sum = 0;    for(v = 1; v < hmmp->nr_v - 1; v++) /* v = to-vertex */ {#ifdef DEBUG_FW      printf("prob to vertex %d\n", v);#endif      res = 0.0;      wp = *(hmmp->tot_from_trans_array + v);      while((w = wp->vertex) != END) /* w = from-vertex */ {	/* calculate intermediate results */	res += (prev_rowp + w)->prob * *(hmmp->tot_transitions + (w * nr_v + v));	if(*(hmmp->tot_transitions + (w * nr_v + v)) < 0) {	  printf("found model transition prob from %d to %d < 0.0\n", w, v);	  exit(0);	}	wp++;#ifdef DEBUG_FW	printf("prev = %f: ", (prev_rowp + w)->prob);	printf("trans = %f\n",  *(hmmp->tot_transitions + (w * nr_v + v)));#endif      }                 /* calculate prob of producing letter l in v */      t_res3 = get_single_emission_score_multi(hmmp, seq, seq_2, seq_3, seq_4, c, v, replacement_letter_c, replacement_letter_c_2,					       replacement_letter_c_3, replacement_letter_c_4, use_labels, a_index,					       a_index_2, a_index_3, a_index_4, multi_scoring_method);            res = res * t_res3;      row_sum += res;	      /* save result in matrices */      (cur_rowp + v)->prob = res;            #ifdef DEBUG_FW      printf("res = %f\n", res);#endif    }        /* scale the results, row_sum = the total probability of      * having produced the sequence up to and including character c */    scale_fsp[c] = row_sum;#ifdef DEBUG_FW    printf("rowsum = %f\n",row_sum);    printf("scaling set to: %f\n", scale_fsp[c]);#endif    if(row_sum == 0.0) {      printf("Sequence cannot be produced by this hmm\n");      sequence_as_string(s);      printf("pos = %d\n",c);      return NOPROB;    }    for(v = 0; v < hmmp->nr_v; v++) {      if((cur_rowp + v)->prob != 0){	(cur_rowp + v)->prob = ((cur_rowp + v)->prob)/row_sum; /* scaling */      }    }        /* move row pointers one row forward */    prev_rowp = cur_rowp;    cur_rowp = cur_rowp + hmmp->nr_v;  }   /* Fill in transition to end state */  res = 0;  wp = *(hmmp->tot_from_trans_array + hmmp->nr_v - 1);  while(wp->vertex != END) {    t_res1 = (prev_rowp + wp->vertex)->prob;    t_res2 = *((hmmp->tot_transitions) + get_mtx_index(wp->vertex, hmmp->nr_v-1, hmmp->nr_v));    if(t_res2 > 1.0) {      t_res2 = 1.0;    }    res += t_res1 * t_res2;    wp++;  }#ifdef DEBUG_FW  printf("res = %f\n", res);#endif  (cur_rowp + hmmp->nr_v - 1)->prob = res; /* obs: no scaling performed here */#ifdef DEBUG_FW  dump_forward_matrix(seq_len + 2, hmmp->nr_v, forw_mtxp);  dump_scaling_array(seq_len + 1, scale_fsp);#endif    /* Garbage collection and return */  free(seq);  if(hmmp->nr_alphabets > 1) {    free(seq_2);  }  if(hmmp->nr_alphabets > 2) {    free(seq_3);  }  if(hmmp->nr_alphabets > 3) {    free(seq_4);  }  return OK;}/**************************** the backward algorithm **********************************/int backward_multi(struct hmm_multi_s *hmmp, struct letter_s *s, struct letter_s *s_2, struct letter_s *s_3,		   struct letter_s *s_4, struct backward_s **ret_backw_mtxpp,		   double *scale_fsp, int use_labels, int multi_scoring_method){  struct backward_s *backw_mtxp, *cur_rowp, *prev_rowp; /* pointers to backward matrix */  struct letter_s *seq, *seq_2, *seq_3, *seq_4; /* pointer to the sequence */  int i; /* loop index */  int seq_len; /* length of the sequence */  int c, v, w; /* looping indices, c loops over the sequence,		  v and w over the vertices in the HMM */  int a_index, a_index_2, a_index_3, a_index_4; /* holds the current letter's position in the alphabet */  double row_sum, res,t_res1,t_res2, t_res3; /* temporary variables to calculate results */  double t_res3_1, t_res3_2, t_res3_3, t_res3_4;  struct path_element *wp;  int replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4;    /* Allocate memory for matrix + some initial setup:   * Note 1: probability matrix has the sequence indices vertically   * and the vertex indices horizontally meaning it will be filled row by row   * Note 2: *ret_backw_mtxpp is allocated here, but must be   * freed by caller   * Note 3: *scale_fspp is the scaling array produced by forward() meaning backward()   * must be called after forward() */  seq_len = get_seq_length(s);  *ret_backw_mtxpp = (struct backward_s*)(malloc_or_die((seq_len+2) *							hmmp->nr_v * 							sizeof(struct backward_s)));  backw_mtxp = *ret_backw_mtxpp;    /* Convert sequence to 1...L for easier indexing */  seq = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));  memcpy(seq+1, s, seq_len * sizeof(struct letter_s));  if(hmmp->nr_alphabets > 1) {    seq_2 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_2+1, s_2, seq_len * sizeof(struct letter_s));  }  if(hmmp->nr_alphabets > 2) {    seq_3 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_3+1, s_3, seq_len * sizeof(struct letter_s));  }  if(hmmp->nr_alphabets > 3) {    seq_4 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_4+1, s_4, seq_len * sizeof(struct letter_s));  }  /* Initialize the last row of the matrix */  (backw_mtxp + get_mtx_index(seq_len + 1, hmmp->nr_v - 1,			      hmmp->nr_v))->prob = 1.0; /* sets index							 * (seq_len+1,nr_v-1) i.e.							 * the lower right							 * corner of the matrix to 							 * 1.0,the rest are already							 * 0.0 as they should be*/    /* Fill in next to last row in matrix (i.e. add prob for path to end state for all states    * that have a transition path to the end state) */  prev_rowp = backw_mtxp + get_mtx_index(seq_len + 1 , 0, hmmp->nr_v);  cur_rowp = prev_rowp - hmmp->nr_v;  wp = *(hmmp->from_trans_array + hmmp->nr_v - 1);    while(wp->vertex != END) {    w = wp->vertex;    t_res1 = 1.0;    while(wp->next != NULL) {      t_res1 = t_res1 * *(hmmp->transitions +			  get_mtx_index(wp->vertex, (wp + 1)->vertex, hmmp->nr_v));      wp++;    }    (cur_rowp + w)->prob += t_res1;    if((cur_rowp + w)->prob > 1.0) {      (cur_rowp + w)->prob = 1.0;    }    wp++;  }  prev_rowp = cur_rowp;  cur_rowp = cur_rowp - hmmp->nr_v;    /* Fill in first rows moving upwards in matrix */  for(c = seq_len; c >= 1; c--) {        /* get alphabet index for c */#ifdef DEBUG_BW    printf("c = %d\n", c);#endif

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -