⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 set_profile_hmm_parameters.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 2 页
字号:
	  }	  else if(last_sign == INSERT_ACID) {	    cur_sign = INSERT_ACID;	  }	  else if(last_sign == GAP) {	    cur_sign = GAP;	  }	  is_gap = YES;	  cur_msa_column++;	  	}      }      else if(cur == ';') {	      }      else /* ACID */{	last_sign = cur_sign;	if(cur_msa_column == cur_mtx_col_nr) /* match */{	  if(last_sign == ACID || last_sign == NEW) {	    cur_sign = ACID;	    *(transitions + get_mtx_index(1,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  else if(last_sign == GAP) {	    cur_sign = ACID;	    *(transitions + get_mtx_index(5,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  else if(last_sign == INSERT_ACID) {	    cur_sign = ACID;	    *(transitions + get_mtx_index(7,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  if(is_gap == YES) {	    is_gap = NO;	    cur_insert_column++;	  }	  cur_msa_column++;	  cur_mtx_column++;	}	else if(cur_msa_column < cur_mtx_col_nr) /* insert */{	  if(last_sign == ACID || last_sign == NEW) {	    cur_sign = INSERT_ACID;	    *(transitions + get_mtx_index(3,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  else if(last_sign == INSERT_ACID) {	    cur_sign = INSERT_ACID;	    *(transitions + get_mtx_index(6,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  else if(last_sign == GAP) {	    cur_sign = INSERT_ACID;	  }	  is_gap = YES;	  cur_msa_column++;	}      }    }  }  //printf(check 6\n");  /* check that msa length and nr columns match */  if((nr_mtx_columns - 1)*3 + 4 != hmm.nr_v) {    printf("Warning: length of hmm is not equal to length of profile\n");  }  else {    /* start-transitions */    s_tot = *(transitions + get_mtx_index(1,0,nr_mtx_columns)) + *(transitions + get_mtx_index(2,0,nr_mtx_columns));    sd = *(transitions + get_mtx_index(2,0,nr_mtx_columns));    sm = *(transitions + get_mtx_index(1,0,nr_mtx_columns));    *(hmm.transitions + get_mtx_index(0, 3, hmm.nr_v)) = SI_PROB;    *(hmm.transitions + get_mtx_index(0, 1, hmm.nr_v)) = (double)sd / (double)s_tot *(1.0 - SI_PROB);    *(hmm.transitions + get_mtx_index(0, 2, hmm.nr_v)) = (double)sm / (double)s_tot *(1.0 - SI_PROB);    /* for all dd-trans and dm-trans, calculate values */    for(i = 1; i < nr_mtx_columns - 1; i++) {      d_tot = *(transitions + get_mtx_index(4,i,nr_mtx_columns)) + *(transitions + get_mtx_index(5,i,nr_mtx_columns));      dd = *(transitions + get_mtx_index(4,i,nr_mtx_columns));      dm = *(transitions + get_mtx_index(5,i,nr_mtx_columns));      *(hmm.transitions + get_mtx_index(3 * (i-1) + 1, 3 * i + 1, hmm.nr_v)) = (double)dd / (double)d_tot;      *(hmm.transitions + get_mtx_index(3 * (i-1) + 1, 3 * i + 2, hmm.nr_v)) = (double)dm / (double)d_tot;    }    /* for all mm-trans, calculate values */    /* for all md-trans, calculate values */    /* for all mi-trans, calculate values */    for(i = 1; i < nr_mtx_columns - 1; i++) {      m_tot = *(transitions + get_mtx_index(1,i,nr_mtx_columns)) + *(transitions + get_mtx_index(2,i,nr_mtx_columns)) +	*(transitions + get_mtx_index(3,i,nr_mtx_columns));      mm = *(transitions + get_mtx_index(1,i,nr_mtx_columns));      md = *(transitions + get_mtx_index(2,i,nr_mtx_columns));      mi = *(transitions + get_mtx_index(3,i,nr_mtx_columns));      *(hmm.transitions + get_mtx_index(3 * (i-1) + 2, 3 * i + 2, hmm.nr_v)) = (double)mm / (double)m_tot;      *(hmm.transitions + get_mtx_index(3 * (i-1) + 2, 3 * i + 1, hmm.nr_v)) = (double)md / (double)m_tot;      *(hmm.transitions + get_mtx_index(3 * (i-1) + 2, 3 * i + 3, hmm.nr_v)) = (double)mi / (double)m_tot;    }    *(hmm.transitions + get_mtx_index((nr_mtx_columns-2)*3 + 2, (nr_mtx_columns-1)*3 + 2, hmm.nr_v)) = 1.0 - EI_PROB;;    *(hmm.transitions + get_mtx_index((nr_mtx_columns-2)*3 + 2, (nr_mtx_columns-1)*3 + 1, hmm.nr_v)) = EI_PROB;    /* for all ii-trans, calculate values */    /* for all im-trans, calculate values */    for(i = 1; i < nr_mtx_columns; i++) {      i_tot = *(transitions + get_mtx_index(6,i,nr_mtx_columns)) + *(transitions + get_mtx_index(7,i,nr_mtx_columns));      ii = *(transitions + get_mtx_index(6,i,nr_mtx_columns));      im = *(transitions + get_mtx_index(7,i,nr_mtx_columns));      *(hmm.transitions + get_mtx_index(3 * (i-1) + 3, 3 * i - 1 , hmm.nr_v)) = (double)ii / (double)i_tot;      *(hmm.transitions + get_mtx_index(3 * (i-1) + 3, 3 * (i-1) + 3, hmm.nr_v)) = (double)im / (double)i_tot;    }    transprob_ii = exp(log(0.5) / INSERT_SIZE);    *(hmm.transitions + get_mtx_index(3, 2, hmm.nr_v)) = 1.0 - transprob_ii;    *(hmm.transitions + get_mtx_index(3, 3, hmm.nr_v)) = transprob_ii;    *(hmm.transitions + get_mtx_index(nr_mtx_columns*3 - 2, nr_mtx_columns*3 - 1, hmm.nr_v)) = 1.0 - transprob_ii;    *(hmm.transitions + get_mtx_index(nr_mtx_columns*3 - 2, nr_mtx_columns*3 - 2, hmm.nr_v)) = transprob_ii;  }  /* recalculate insert-emissions for columns where alignment give a hint to the distribution */  if(use_insert_matrix == YES) {    rewind(seq_infile);    while((fgets(row, tot_nr_chars, seq_infile)) != NULL) {      i = 0;      j = 0;      while(1) {	if(row[2*i+1] == '>') {	  break;	}	if(i >= (*(transitions + j)) - 1) {	  j++;	  i++;	  continue;	}	else if(row[2*i+1] == ' ' || row[2*i+1] == '_' || row[2*i+1] == '.' || row[2*i+1] == '-') {	  i++;	}	else {	  if((a_index = get_alphabet_index_single(hmm.alphabet, row[2*i+1], hmm.a_size)) < 0) {	    if((a_index = get_replacement_letter_index_single(&(row[2*i+1]), &replacement_letters)) < 0) {	      i++;	    }	    else {	      printf("letter '%c' is not in alphabet\n",row[2*i+1]);	      exit(0);	    }	  }	  else {	    *(insert_emissions + get_mtx_index(j, a_index, hmm.a_size+1)) += 1.0;	    *(insert_emissions + get_mtx_index(j, hmm.a_size, hmm.a_size+1)) += 1.0;	    i++;	  }	}      }    }        for(i = 1; i < nr_mtx_columns; i++) {      priorize_shares(i, insert_emissions, prior_scaler);            for(j = 0; j < hmm.a_size; j++) {	*(hmm.emissions + get_mtx_index(i*3,j,hmm.a_size)) = *(insert_emissions + get_mtx_index(i-1,j,hmm.a_size+1));      }    }  }  /* cleanup and return */  free(transitions);  free(insert_emissions);  free(row);  return;}void add_replacement_letter(char cur){}int read_priorfile(FILE *priorfile){  int j,k;  double q_value, alpha_value, alpha_sum, logbeta;  char s[2048];  char ps[2048];  char *file_name;  char *pri;    rewind(priorfile);  /* put default name */  strcpy(em_di.name, "default");  /* put nr of components in struct */  if(fgets(ps, 2048, priorfile) != NULL) {  }  else {    return -1;  }  while(*ps == '#') {    if(fgets(ps, 2048, priorfile) != NULL) {    }    else {      return -1;    }  }  em_di.nr_components = atoi(&ps[0]);    /* read 2 empty lines */  if(fgets(ps, 2048, priorfile) != NULL) {  }  if(fgets(ps, 2048, priorfile) != NULL) {  }    /* allocate memory for arrays and matrix to this prior struct */  em_di.q_values = malloc_or_die(em_di.nr_components *				 sizeof(double));  em_di.alpha_sums = malloc_or_die(em_di.nr_components *				   sizeof(double));  em_di.logbeta_values =    malloc_or_die(em_di.nr_components * sizeof(double));  em_di.prior_values = malloc_or_die(em_di.nr_components *				     hmm.a_size * sizeof(double));    for(j = 0; j < em_di.nr_components; j++) {    /* put q-value in array  */    if(fgets(ps, 2048, priorfile) != NULL) {      q_value = atof(&ps[0]);      *(em_di.q_values + j) = q_value; #ifdef DEBUG_PRI      printf("q_value = %f\n", *(em_di.q_values + j));#endif    }        /* put alpha-values of this component in matrix */    alpha_sum = 0.0;    k = 0;    if(fgets(ps, 2048, priorfile) != NULL) {      pri = &ps[0];      for(k = 0; k < hmm.a_size; k++) {	alpha_value = strtod(pri, &pri);	alpha_sum += alpha_value;	*((em_di.prior_values) +	  get_mtx_index(j, k, hmm.a_size)) = alpha_value;      }    }        /* put sum of alphavalues in array */    *((em_di.alpha_sums) + j) = alpha_sum;         /* calculate logB(alpha) for this compoment, store in array*/    logbeta = 0;    for(k = 0; k < hmm.a_size; k++) {      logbeta += lgamma(*(em_di.prior_values +			  get_mtx_index(j, k, hmm.a_size)));      #ifdef DEBUG_PRI      printf("prior_value = %f\n", *((em_di.prior_values) +				     get_mtx_index(j, k, hmm.a_size)));      printf("lgamma_value = %f\n", lgamma(*((em_di.prior_values) +					     get_mtx_index(j, k, hmm.a_size))));#endif    }    logbeta = logbeta - lgamma(*(em_di.alpha_sums + j));    *(em_di.logbeta_values + j) = logbeta;        /* read 3 empty lines */    if(fgets(ps, 2048, priorfile) != NULL) {    }    if(fgets(ps, 2048, priorfile) != NULL) {    }    if(fgets(ps, 2048, priorfile) != NULL) {    }  }  #ifdef DEBUG_PRI  dump_prior_struct(em_di);  #endif}int priorize_shares(int pos, double *insert_emissions, int prior_scaler){  int nr_components, comps, a_index;   double occ_sums;  double q_value, scaling_factor, X_sum, *X_values, ed_res1, *logbeta_an_values;  double exponent, prior_prob, tot_prior_prob;   /************the update part ********************/  nr_components = em_di.nr_components;  logbeta_an_values = malloc_or_die(nr_components * sizeof(double));  scaling_factor = -FLT_MAX;  X_sum = 0.0;  X_values = malloc_or_die(hmm.a_size * sizeof(double));    /* calculate logB(alpha + n) for all components +   * calculate scaling factor for logB(alpha + n) - logB(alpha) */  for(comps = 0; comps < nr_components; comps++) {    ed_res1 = 0;    occ_sums = 0;    for(a_index = 0; a_index < hmm.a_size; a_index++) {      ed_res1 += lgamma(*(em_di.prior_values +			  get_mtx_index(comps, a_index, hmm.a_size)) +			*(insert_emissions + get_mtx_index(pos-1,a_index,hmm.a_size+1)));      occ_sums += *(insert_emissions + get_mtx_index(pos-1,a_index,hmm.a_size+1));    }    ed_res1 = ed_res1 - lgamma(*(em_di.alpha_sums + comps) + (double)(occ_sums));    *(logbeta_an_values + comps) = ed_res1;    if((ed_res1 = ed_res1 - *(em_di.logbeta_values + comps)) > scaling_factor) {      scaling_factor = ed_res1;    }  }    /* calculate all the Xi's */  for(a_index = 0; a_index < hmm.a_size; a_index++) {    *(X_values + a_index) = 0;    for(comps = 0; comps < nr_components; comps++) {      q_value = *(em_di.q_values + comps);      exponent = (*(logbeta_an_values + comps) - *(em_di.logbeta_values + comps) - 		  scaling_factor);      prior_prob = (*(em_di.prior_values + get_mtx_index(comps,a_index, hmm.a_size)) * (double)(prior_scaler) +		    *(insert_emissions + get_mtx_index(pos-1,a_index,hmm.a_size+1)));      tot_prior_prob = (*(em_di.alpha_sums + comps) + (double)(occ_sums));      *(X_values + a_index) += q_value * exp(exponent) * prior_prob / tot_prior_prob;#ifdef DEBUG_PRI      printf("\nscaling factor = %f\n", scaling_factor);      printf("a_index = %d\n", a_index);      printf("q_value = %f\n", q_value);      printf("exponent = %f\n", exponent);      printf("prior_prob = %f\n", prior_prob);      printf("tot_prior_prob = %f\n\n", tot_prior_prob);#endif    }    X_sum += *(X_values + a_index);  }    /* update share values */  for(a_index = 0; a_index < hmm.a_size; a_index++) {    ed_res1 = *(X_values + a_index) / X_sum;    if(ed_res1 != 0.0) {      *(insert_emissions + get_mtx_index(pos-1,a_index,hmm.a_size+1)) = ed_res1;    }    else {      *(insert_emissions + get_mtx_index(pos-1,a_index,hmm.a_size+1)) = ed_res1;    }  }    /* cleanup */  free(logbeta_an_values);  free(X_values);}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -