⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 core_algorithms_multialpha.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
    t_res2 = t_res2 + *((hmmp->log_transitions) +			get_mtx_index(wp->vertex, hmmp->nr_v - 1, hmmp->nr_v));    #ifdef DEBUG_VI    printf("prev = %f: ", t_res1);    printf("trans = %f\n", t_res2);#endif    if(t_res1 != DEFAULT && t_res2 != DEFAULT) {      res = t_res1 + t_res2;    }    else {      res = DEFAULT;    }    if(prev == NO_PREV && res != DEFAULT) {      prev = from_vp->vertex;      prevp = from_vp;      max = res;      continue;    }    if(res > max && res != DEFAULT) {      prev = from_vp->vertex;      prevp = from_vp;      max = res;    }    wp++;  }#ifdef DEBUG_VI  printf("res = %f\n", max);#endif  if(max != DEFAULT) {    (cur_rowp + hmmp->nr_v-1)->prob = max;    (cur_rowp + hmmp->nr_v-1)->prev = prev;    (cur_rowp + hmmp->nr_v-1)->prevp = prevp;  }  else {#ifdef DEBUG_VI    dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->transitions);    dump_trans_matrix(hmmp->nr_v, hmmp->nr_v, hmmp->log_transitions);#endif        printf("Sequence cannot be produced by this hmm\n");     sequence_as_string(s);    return NOPROB;  }#ifdef PATH  //dump_viterbi_matrix(seq_len + 2, hmmp->nr_v, viterbi_mtxp);  //dump_viterbi_path((cur_rowp + w)->prevp);  printf("normalized log likelihood for most probable path = %f\n",	 0.0 - (((cur_rowp + hmmp->nr_v - 1)->prob) / seq_len));  printf("and most probable path is: ");  //dump_viterbi_path((cur_rowp + hmmp->nr_v - 1), hmmp, viterbi_mtxp, seq_len + 1, hmmp->nr_v);  printf("%d\n",hmmp->nr_v - 1);  printf("log prob = %f\n", (cur_rowp + hmmp->nr_v-1)->prob);  printf("real prob = %f\n", pow(10,(cur_rowp + hmmp->nr_v-1)->prob));  dump_viterbi_label_path((cur_rowp + hmmp->nr_v - 1), hmmp, viterbi_mtxp, seq_len + 1, hmmp->nr_v);  printf("\n");#endif   /* Garbage collection and return */  free(seq);  if(hmmp->nr_alphabets > 1) {    free(seq_2);  }  if(hmmp->nr_alphabets > 2) {    free(seq_3);  }  if(hmmp->nr_alphabets > 3) {    free(seq_4);  }  return OK;}/************************* the one_best algorithm **********************************/int one_best_multi(struct hmm_multi_s *hmmp, struct letter_s *s, struct letter_s *s_2, struct letter_s *s_3,		   struct letter_s *s_4, struct one_best_s **ret_one_best_mtxpp,		   double **ret_scale_fspp, int use_labels, char *best_labeling, int multi_scoring_method){  struct one_best_s *one_best_mtxp, *cur_rowp, *prev_rowp; /* pointers to forward matrix */  double *scale_fsp; /* pointer to array of scaling factors */  struct letter_s *seq, *seq_2, *seq_3, *seq_4; /* pointer to the sequence */  int *sorted_v_list; /* final list for the association of same-labeling-elements */  int v_list_index;  int i,j; /* loop variables */  int seq_len; /* length of the sequence */  int c, v, w; /* looping indices, c loops over the sequence,		  v and w over the vertices in the HMM */  double row_sum, res, t_res1, t_res2, t_res3; /* temporary variables to calculate probabilities */  double t_res3_1, t_res3_2, t_res3_3, t_res3_4;  int nr_v;  struct path_element *wp; /* for traversing the paths from v to w */  int a_index, a_index_2, a_index_3, a_index_4; /* holds the current letter's position in the alphabet */  int replacement_letter_c, replacement_letter_c_2, replacement_letter_c_3, replacement_letter_c_4;  double scaled_result;    /* Allocate memory for matrix and scaling factors + some initial setup:   * Note 1: one_best probability matrix has the sequence indices vertically   * and the vertex indices horizontally meaning it will be filled row by row   * Note 2: *ret_one_best_mtxpp and *ret_scale_fspp are allocated here, but must be   * freed by caller */  nr_v = hmmp->nr_v;  seq_len = get_seq_length(s);  *ret_one_best_mtxpp = (struct one_best_s*)(malloc_or_die((seq_len+2) *						       hmmp->nr_v *						       sizeof(struct one_best_s)));  one_best_mtxp = *ret_one_best_mtxpp;  *ret_scale_fspp = (double*)(malloc_or_die((seq_len+2) * sizeof(double)));  scale_fsp = *ret_scale_fspp;  sorted_v_list = (int*)(malloc_or_die((hmmp->nr_v * 2 + 1) * sizeof(int)));  /* Convert sequence to 1...L for easier indexing */  seq = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));  memcpy(seq+1, s, seq_len * sizeof(struct letter_s));  if(hmmp->nr_alphabets > 1) {    seq_2 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_2+1, s_2, seq_len * sizeof(struct letter_s));  }  if(hmmp->nr_alphabets > 2) {    seq_3 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_3+1, s_3, seq_len * sizeof(struct letter_s));  }  if(hmmp->nr_alphabets > 3) {    seq_4 = (struct letter_s*) malloc_or_die(((seq_len + 2) * sizeof(struct letter_s)));    memcpy(seq_4+1, s_4, seq_len * sizeof(struct letter_s));  }    /* Initialize the first row of the matrix */  one_best_mtxp->prob = 1.0; /* sets index (0,0) to 1.0,				the rest are already 0.0 as they should be */   one_best_mtxp->labeling = (char*)(malloc_or_die(1 * sizeof(char)));  for(i = 1; i < hmmp->nr_v; i++) {    (one_best_mtxp+i)->labeling = NULL;  }  *scale_fsp = 1.0;    /* create initial sorted v-list*/  *(sorted_v_list) = 0; /* 0 is always the number of the start state */  *(sorted_v_list + 1) = V_LIST_NEXT;  *(sorted_v_list + 2) = V_LIST_END;    /* Fill in middle rows */  prev_rowp = one_best_mtxp;  cur_rowp = one_best_mtxp + hmmp->nr_v;  for(c = 1; c <= seq_len; c++) {        /* get alphabet index for c*/    if(hmmp->alphabet_type == DISCRETE) {      replacement_letter_c = NO;      a_index = get_alphabet_index(&seq[c], hmmp->alphabet, hmmp->a_size);      if(a_index < 0) {	a_index = get_replacement_letter_index_multi(&seq[c], hmmp->replacement_letters, 1);	if(a_index < 0) {	  printf("Letter '%s' is not in alphabet\n", (&seq[c])->letter);	  return NOPROB;	}	replacement_letter_c = YES;      }    }    if(hmmp->nr_alphabets > 1) {      if(hmmp->alphabet_type_2 == DISCRETE) {	replacement_letter_c_2 = NO;	a_index_2 = get_alphabet_index(&seq_2[c], hmmp->alphabet_2, hmmp->a_size_2);	if(a_index_2 < 0) {	  a_index_2 = get_replacement_letter_index_multi(&seq_2[c], hmmp->replacement_letters, 2);	  if(a_index_2 < 0) {	    printf("Letter '%s' is not in alphabet\n", (&seq_2[c])->letter);	    return NOPROB;	  }	  replacement_letter_c_2 = YES;	}      }    }    if(hmmp->nr_alphabets > 2) {      if(hmmp->alphabet_type_3 == DISCRETE) {	replacement_letter_c_3 = NO;	a_index_3 = get_alphabet_index(&seq_3[c], hmmp->alphabet_3, hmmp->a_size_3);	if(a_index_3 < 0) {	  a_index_3 = get_replacement_letter_index_multi(&seq_3[c], hmmp->replacement_letters, 3);	  if(a_index_3 < 0) {	    printf("Letter '%s' is not in alphabet\n", (&seq_3[c])->letter);	    return NOPROB;	  }	  replacement_letter_c_3 = YES;	}      }    }    if(hmmp->nr_alphabets > 3) {      if(hmmp->alphabet_type_4 == DISCRETE) {	replacement_letter_c_4 = NO;	a_index_4 = get_alphabet_index(&seq_4[c], hmmp->alphabet_4, hmmp->a_size_4);	if(a_index_4 < 0) {	  a_index_4 = get_replacement_letter_index_multi(&seq_4[c], hmmp->replacement_letters, 4);	  if(a_index_4 < 0) {	    printf("Letter '%s' is not in alphabet\n", (&seq_4[c])->letter);	    return NOPROB;	  }	  replacement_letter_c_4 = YES;	}      }    }    #ifdef DEBUG_FW      printf("seq[c] = %s\n", &seq[c]);    printf("a_index = %d\n", a_index);    printf("v_list dump out ");    dump_v_list(sorted_v_list);   #endif    /* calculate probabilities */    row_sum = 0.0;    for(v = 0; v < hmmp->nr_v - 1; v++) /* v = to-vertex */ {#ifdef DEBUG_FW      printf("prob to vertex %d\n", v);#endif      v_list_index = 0;      (cur_rowp + v)->labeling = NULL;            while(*(sorted_v_list + v_list_index) != V_LIST_END) {	res = 0.0;	while(*(sorted_v_list + v_list_index) != V_LIST_NEXT) {	  w = *(sorted_v_list + v_list_index); /* w = from-vertex */	  /* calculate intermediate results */	  res += (prev_rowp + w)->prob * *(hmmp->tot_transitions + (w * nr_v + v));	  if(*(hmmp->tot_transitions + (w * nr_v + v)) < 0) {	    printf("Error: found model transition prob from %d to %d < 0.0\n", w, v);	    exit(0);	  }#ifdef DEBUG_FW	  printf("prev = %f: ", (prev_rowp + w)->prob);	  printf("trans = %f\n", *(hmmp->tot_transitions + (w * nr_v + v)));#endif	  v_list_index++;	}		if(res == 0.0) {	  v_list_index++;	  continue;	}	/* calculate prob of producing letter l in v */	t_res3 = get_single_emission_score_multi(hmmp, seq, seq_2, seq_3, seq_4, c, v, replacement_letter_c, replacement_letter_c_2,						 replacement_letter_c_3, replacement_letter_c_4, use_labels, a_index,						 a_index_2, a_index_3, a_index_4, multi_scoring_method);			res = res * t_res3;			/* check if this score is best so far */#ifdef DEBUG_FW	printf("best score = %f\n",(cur_rowp + v)->prob);#endif	if(res > (cur_rowp + v)->prob) {#ifdef DEBUG_FW	  	  printf("updating best score to %f\n", res);  #endif	  /* save result in matrices */	  (cur_rowp + v)->prob = res;	  /* set pointer to point to current labeling */	  (cur_rowp + v)->labeling = (prev_rowp + w)->labeling;	  if((cur_rowp + v)->labeling == NULL) {	    printf("Error: NULL labeling when updating best score\n");	    exit(0);	  }	}	#ifdef DEBUG_FW	printf("res = %f\n", res);#endif	v_list_index++;      }    }        /* update labeling pointers */    update_labelings(cur_rowp, hmmp->vertex_labels, sorted_v_list, seq_len, c, hmmp->labels, hmmp->nr_labels, hmmp->nr_v);    deallocate_row_labelings(prev_rowp, hmmp->nr_v);        /* scale the results, row_sum = the total probability of      * having produced the labelings up to and including character c */    row_sum = 0.0;    for(v = 0; v < hmmp->nr_v; v++) {      row_sum = row_sum + (cur_rowp + v)->prob;#ifdef DEBUG_FW      dump_labeling((cur_rowp + v)->labeling, c);      printf("c = %d\n", c);#endif    }    scale_fsp[c] = row_sum;#ifdef DEBUG_FW    printf("rowsum = %f\n",row_sum);    printf("scaling set to: %f\n", scale_fsp[c]);#endif    if(row_sum == 0.0) {      printf("Sequence cannot be produced by this hmm\n");       sequence_as_string(s);      exit(0);      return NOPROB;    }    for(v = 0; v < hmmp->nr_v; v++) {      if((cur_rowp + v)->prob != 0.0){	(cur_rowp + v)->prob = ((cur_rowp + v)->prob)/row_sum; /* scaling */      }    }        /* move row pointers one row forward */    prev_rowp = cur_rowp;    cur_rowp = cur_rowp + hmmp->nr_v;  }  /* Fill in transition to end state */  v_list_index = 0;  (cur_rowp + hmmp->nr_v - 1)->labeling = NULL;  while(*(sorted_v_list + v_list_index) != V_LIST_END) {    res = 0.0;    while(*(sorted_v_list + v_list_index) != V_LIST_NEXT) {      w = *(sorted_v_list + v_list_index); /* w = from-vertex */      t_res1 = (prev_rowp + w)->prob;      t_res2 = *((hmmp->tot_transitions) + get_mtx_index(w, hmmp->nr_v-1, hmmp->nr_v));      if(t_res2 > 1.0) {	t_res2 = 1.0;      }      res += t_res1 * t_res2;      v_list_index++;    }        /* check if this score is best so far */    if(res > (cur_rowp + hmmp->nr_v - 1)->prob) {      /* save result in matrices */      (cur_rowp + hmmp->nr_v - 1)->prob = res;      /* set pointer to point to current labeling */      (cur_rowp + hmmp->nr_v - 1)->labeling = (prev_rowp + w)->labeling;    }        v_list_index++;  }#ifdef DEBUG_FW  dump_one_best_matrix(seq_len + 2, hmmp->nr_v, one_best_mtxp);  dump_scaling_array(seq_len + 1, scale_fsp);#endif    /* store results */  scaled_result = (cur_rowp + hmmp->nr_v - 1)->prob;  memcpy(best_labeling, ((cur_rowp + hmmp->nr_v - 1)->labeling) + 1, seq_len * sizeof(char));  best_labeling[seq_len] = '\0';#ifdef DEBUG_PATH  printf("seq_len = %d\n", seq_len);  printf("best labeling = %s\n", best_labeling);#endif    /* Garbage collection and return */  free(seq);  if(hmmp->nr_alphabets > 1) {    free(seq_2);  }  if(hmmp->nr_alphabets > 2) {    free(seq_3);  }  if(hmmp->nr_alphabets > 3) {    free(seq_4);  }  free(sorted_v_list);  /* FREE labelings, except result labeling */  deallocate_row_labelings(prev_rowp, hmmp->nr_v);    return OK;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -