⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 std_funcs.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
  /* get sequence data */  j = 0;  for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {    memcpy(reverse_msa_seq_infop->msa_seq + (i * (a_size + 1)),					     msa_seq_infop->msa_seq + (j * (a_size + 1)),								       sizeof(struct msa_letter_s) * (a_size+1));    j++;  }    /* get gap data (not implemented) */  for(i = 0; i < msa_seq_infop->msa_seq_length; i++) {    *(reverse_msa_seq_infop->gaps + (msa_seq_infop->msa_seq_length + i)) = END;  }  for(i = 0; i < msa_seq_infop->msa_seq_length; i++) {    *(reverse_msa_seq_infop->gaps + i) = (int*)(reverse_msa_seq_infop->gaps + (msa_seq_infop->msa_seq_length + i));  }  /* get gap shares */  j = 0;  for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {    memcpy(reverse_msa_seq_infop->gap_shares + i, msa_seq_infop->gap_shares + j, 1 * sizeof(double));    j++;  }  /* get lead_columns */  j = 0;  for(i = msa_seq_infop->nr_lead_columns - 1; i >= 0; i--) {    *(reverse_msa_seq_infop->lead_columns_start + i) = (msa_seq_infop->msa_seq_length - 1) - *(msa_seq_infop->lead_columns_start + j);    j++;  }  *(reverse_msa_seq_infop->lead_columns_start + msa_seq_infop->nr_lead_columns) = END;#ifdef DEBUG_REVERSE_SEQ  dump_msa_seqs(msa_seq_infop, a_size);  dump_msa_seqs(reverse_msa_seq_infop, a_size);#endif}void get_reverse_msa_seq_multi(struct msa_sequences_multi_s *msa_seq_infop, struct msa_sequences_multi_s *reverse_msa_seq_infop,			       struct hmm_multi_s *hmmp){  /* note that this function does not implement the gaps-pointing, everything points to END */    int i,j;  int nr_alphabets;  int a_size, a_size_2, a_size_3, a_size_4;  nr_alphabets = hmmp->nr_alphabets;  a_size = hmmp->a_size;  a_size_2 = hmmp->a_size_2;  a_size_3 = hmmp->a_size_3;  a_size_4 = hmmp->a_size_4;    reverse_msa_seq_infop->nr_seqs = msa_seq_infop->nr_seqs;  reverse_msa_seq_infop->msa_seq_length = msa_seq_infop->msa_seq_length;  reverse_msa_seq_infop->nr_lead_columns = msa_seq_infop->nr_lead_columns;  reverse_msa_seq_infop->msa_seq_1 = (struct msa_letter_s*)(malloc_or_die(msa_seq_infop->msa_seq_length * (a_size + 1) *									sizeof(struct msa_letter_s)));  if(nr_alphabets > 1) {    reverse_msa_seq_infop->msa_seq_2 = (struct msa_letter_s*)(malloc_or_die(msa_seq_infop->msa_seq_length * (a_size_2 + 1) *									    sizeof(struct msa_letter_s)));  }  if(nr_alphabets > 2) {    reverse_msa_seq_infop->msa_seq_3 = (struct msa_letter_s*)(malloc_or_die(msa_seq_infop->msa_seq_length * (a_size_3 + 1) *									    sizeof(struct msa_letter_s)));  }  if(nr_alphabets > 3) {    reverse_msa_seq_infop->msa_seq_4 = (struct msa_letter_s*)(malloc_or_die(msa_seq_infop->msa_seq_length * (a_size_4 + 1) *									    sizeof(struct msa_letter_s)));  }  reverse_msa_seq_infop->gaps = (int**)(malloc_or_die(msa_seq_infop->msa_seq_length * sizeof(int*) +						      msa_seq_infop->msa_seq_length * sizeof(int)));  reverse_msa_seq_infop->lead_columns_start = (int*)(malloc_or_die((msa_seq_infop->nr_lead_columns +1)* sizeof(int)));  reverse_msa_seq_infop->lead_columns_end = reverse_msa_seq_infop->lead_columns_start + (msa_seq_infop->nr_lead_columns);  reverse_msa_seq_infop->gap_shares = (double*)(malloc_or_die(msa_seq_infop->msa_seq_length * sizeof(double)));    /* get sequence data */  j = 0;  for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {    memcpy(reverse_msa_seq_infop->msa_seq_1 + (i * (a_size + 1)),	   msa_seq_infop->msa_seq_1 + (j * (a_size + 1)),	   sizeof(struct msa_letter_s) * (a_size+1));    j++;  }  if(nr_alphabets > 1) {    j = 0;    for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {      memcpy(reverse_msa_seq_infop->msa_seq_2 + (i * (a_size_2 + 1)),	     msa_seq_infop->msa_seq_2 + (j * (a_size_2 + 1)),	     sizeof(struct msa_letter_s) * (a_size_2+1));      j++;    }  }  if(nr_alphabets > 2) {    j = 0;    for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {      memcpy(reverse_msa_seq_infop->msa_seq_3 + (i * (a_size_3 + 1)),	     msa_seq_infop->msa_seq_3 + (j * (a_size_3 + 1)),	     sizeof(struct msa_letter_s) * (a_size_3+1));      j++;    }  }  if(nr_alphabets > 3) {    j = 0;    for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {      memcpy(reverse_msa_seq_infop->msa_seq_4 + (i * (a_size_4 + 1)),	     msa_seq_infop->msa_seq_4 + (j * (a_size_4 + 1)),	     sizeof(struct msa_letter_s) * (a_size_4+1));    j++;    }  }  /* get gap data (not implemented) */  for(i = 0; i < msa_seq_infop->msa_seq_length; i++) {    *(reverse_msa_seq_infop->gaps + (msa_seq_infop->msa_seq_length + i)) = END;  }  for(i = 0; i < msa_seq_infop->msa_seq_length; i++) {    *(reverse_msa_seq_infop->gaps + i) = (int*)(reverse_msa_seq_infop->gaps + (msa_seq_infop->msa_seq_length + i));  }  /* get gap shares */  j = 0;  for(i = msa_seq_infop->msa_seq_length - 1; i >= 0; i--) {    memcpy(reverse_msa_seq_infop->gap_shares + i, msa_seq_infop->gap_shares + j, 1 * sizeof(double));    j++;  }  /* get lead_columns */  j = 0;  for(i = msa_seq_infop->nr_lead_columns - 1; i >= 0; i--) {    *(reverse_msa_seq_infop->lead_columns_start + i) = (msa_seq_infop->msa_seq_length - 1) - *(msa_seq_infop->lead_columns_start + j);    j++;  }  *(reverse_msa_seq_infop->lead_columns_start + msa_seq_infop->nr_lead_columns) = END;#ifdef DEBUG_REVERSE_SEQ  dump_msa_seqs(msa_seq_infop, a_size);  dump_msa_seqs(reverse_msa_seq_infop, a_size);#endif}void get_msa_labels(FILE *labelfile, struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp) {  char row[30000];  int i,j;  rewind(labelfile);  while(1) {    if(fgets(row,30000,labelfile) != NULL) {      if(row[0] == '/') {	for(i = 1; row[i] != '/';i++) {	  (msa_seq_infop->msa_seq + (*(msa_seq_infop->lead_columns_start + i-1)) * (hmmp->a_size+1))->label = row[i];	}      }    }    else {      break;    }  }}void get_msa_labels_all_columns(FILE *labelfile, struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp) {  char row[30000];  int i,j;  rewind(labelfile);  while(1) {    if(fgets(row,30000,labelfile) != NULL) {      if(row[0] == '/') {	for(i = 1; row[i] != '/';i++) {	  (msa_seq_infop->msa_seq + (i-1) * (hmmp->a_size+1))->label = row[i];	}      }    }    else {      break;    }  }}int update_shares_prior(struct emission_dirichlet_s *em_di, struct hmm_s *hmmp,			struct msa_sequences_s *msa_seq_infop, int l){  int nr_components, comps, a_index;   double occ_sums;  double q_value, scaling_factor, X_sum, *X_values, ed_res1, *logbeta_an_values;  double exponent, prior_prob, tot_prior_prob;     /************the update part ********************/  nr_components = em_di->nr_components;  logbeta_an_values = malloc_or_die(nr_components * sizeof(double));  scaling_factor = -FLT_MAX;  X_sum = 0.0;  X_values = malloc_or_die(hmmp->a_size * sizeof(double));    /* calculate logB(alpha + n) for all components +   * calculate scaling factor for logB(alpha + n) - logB(alpha) */  for(comps = 0; comps < nr_components; comps++) {    ed_res1 = 0;    occ_sums = 0;    for(a_index = 0; a_index < hmmp->a_size; a_index++) {      ed_res1 += lgamma(*(em_di->prior_values +			  get_mtx_index(comps, a_index, hmmp->a_size)) +			(double)((msa_seq_infop->msa_seq +				 get_mtx_index(l,a_index,hmmp->a_size+1))->nr_occurences));      occ_sums += (msa_seq_infop->msa_seq + get_mtx_index(l,a_index, hmmp->a_size+1))->nr_occurences;    }    ed_res1 = ed_res1 - lgamma(*(em_di->alpha_sums + comps) + (double)(occ_sums));    *(logbeta_an_values + comps) = ed_res1;    if((ed_res1 = ed_res1 - *(em_di->logbeta_values + comps)) > scaling_factor) {      scaling_factor = ed_res1;    }  }    /* calculate all the Xi's */  for(a_index = 0; a_index < hmmp->a_size; a_index++) {    *(X_values + a_index) = 0;    for(comps = 0; comps < nr_components; comps++) {      q_value = *(em_di->q_values + comps);      exponent = (*(logbeta_an_values + comps) - *(em_di->logbeta_values + comps) - 		  scaling_factor);      prior_prob = (*(em_di->prior_values + get_mtx_index(comps,a_index, hmmp->a_size)) +		    (double)((msa_seq_infop->msa_seq +			     get_mtx_index(l,a_index,hmmp->a_size+1))->nr_occurences));      tot_prior_prob = (*(em_di->alpha_sums + comps) + (double)(occ_sums));      *(X_values + a_index) += q_value * exp(exponent) * prior_prob / tot_prior_prob;#ifdef DEBUG_PRI      printf("\nscaling factor = %f\n", scaling_factor);      printf("a_index = %d\n", a_index);      printf("q_value = %f\n", q_value);      printf("exponent = %f\n", exponent);      printf("prior_prob = %f\n", prior_prob);      printf("tot_prior_prob = %f\n\n", tot_prior_prob);#endif    }    X_sum += *(X_values + a_index);  }    /* update share values */  for(a_index = 0; a_index < hmmp->a_size; a_index++) {    ed_res1 = *(X_values + a_index) / X_sum;    if(ed_res1 != 0.0) {      (msa_seq_infop->msa_seq + get_mtx_index(l, a_index, hmmp->a_size+1))->prior_share = ed_res1;    }    else {      (msa_seq_infop->msa_seq + get_mtx_index(l, a_index, hmmp->a_size+1))->prior_share = ed_res1;    }  }    /* cleanup */  free(logbeta_an_values);  free(X_values);}int replacement_letter(struct letter_s *cur_letterp, struct replacement_letter_s *replacement_letters, 		       struct msa_sequences_s *msa_seq_infop, struct hmm_s *hmmp, int seq_pos){  int i,j,k;  struct letter_s *repl_letter;  int same_letter;  /* find out if letter in cur_letterp is a replacement_letter */  for(i = 0; i < replacement_letters->nr_rl; i++) {    repl_letter = replacement_letters->letters + i;    same_letter = YES;    j = 0;    while(*(repl_letter->letter + j) != '\0' && *(cur_letterp->letter + j) != '\0') {      if(*(repl_letter->letter + j) == *(cur_letterp->letter + j)) {      }      else {	same_letter = NO;      }      j++;    }    if(*(repl_letter->letter + j) != '\0' || *(cur_letterp->letter + j) != '\0') {      same_letter = NO;    }    else if(same_letter == YES) {	break;    }  }  if(same_letter == NO) {    return NO;  }  else { /* k represents the regular letter, i represents which repl_letter this is */     for(k = 0; k < hmmp->a_size; k++) {      (msa_seq_infop->msa_seq + get_mtx_index(seq_pos,k, hmmp->a_size+1))->nr_occurences += 	*(replacement_letters->probs + get_mtx_index(i,k,hmmp->a_size));    }    return YES;  }  }void get_labels_multi(FILE *labelfile, struct sequences_multi_s *seq_infop, struct hmm_multi_s *hmmp, int seq_nr) {  char row[30000];  int i,j;  rewind(labelfile);  while(1) {    if(fgets(row,30000,labelfile) != NULL) {      if(row[0] == '/') {	for(i = 1; row[i] != '/';i++) {	  ((seq_infop->seqs + seq_nr)->seq_1 + (i-1))->label = row[i];	  if(hmmp->nr_alphabets > 1) {	    ((seq_infop->seqs + seq_nr)->seq_2 + (i-1))->label = row[i];	  }	  if(hmmp->nr_alphabets > 2) {	    ((seq_infop->seqs + seq_nr)->seq_3 + (i-1))->label = row[i];	  }	  if(hmmp->nr_alphabets > 3) {	    ((seq_infop->seqs + seq_nr)->seq_4 + (i-1))->label = row[i];	  }	}      }    }    else {      break;    }  }  rewind(labelfile);}void get_msa_labels_multi(FILE *labelfile, struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp) {  char row[30000];  int i,j;  rewind(labelfile);  while(1) {    if(fgets(row,30000,labelfile) != NULL) {      if(row[0] == '/') {	for(i = 1; row[i] != '/';i++) {	  (msa_seq_infop->msa_seq_1 + (*(msa_seq_infop->lead_columns_start + i-1)) * (hmmp->a_size+1))->label = row[i];	  if(hmmp->nr_alphabets > 1) {	    (msa_seq_infop->msa_seq_2 + (*(msa_seq_infop->lead_columns_start + i-1)) * (hmmp->a_size_2+1))->label = row[i];	  }	  if(hmmp->nr_alphabets > 2) {	    (msa_seq_infop->msa_seq_3 + (*(msa_seq_infop->lead_columns_start + i-1)) * (hmmp->a_size_3+1))->label = row[i];	  }	  if(hmmp->nr_alphabets > 3) {	    (msa_seq_infop->msa_seq_4 + (*(msa_seq_infop->lead_columns_start + i-1)) * (hmmp->a_size_4+1))->label = row[i];	  }	}      }    }    else {      break;    }  }}void get_msa_labels_all_columns_multi(FILE *labelfile, struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp) {  char row[30000];  int i,j;  rewind(labelfile);  while(1) {    if(fgets(row,30000,labelfile) != NULL) {      if(row[0] == '/') {	for(i = 1; row[i] != '/';i++) {	  (msa_seq_infop->msa_seq_1 + (i-1) * (hmmp->a_size+1))->label = row[i];	  if(hmmp->nr_alphabets > 1) {	    (msa_seq_infop->msa_seq_2 + (i-1) * (hmmp->a_size_2+1))->label = row[i];	  }	  if(hmmp->nr_alphabets > 2) {	    (msa_seq_infop->msa_seq_3 + (i-1) * (hmmp->a_size_3+1))->label = row[i];	  }	  if(hmmp->nr_alphabets > 3) {	    (msa_seq_infop->msa_seq_4 + (i-1) * (hmmp->a_size_4+1))->label = row[i];	  }	}      }    }    else {      break;    }  }}int update_shares_prior_multi(struct emission_dirichlet_s *em_di, struct hmm_multi_s *hmmp,			struct msa_sequences_multi_s *msa_seq_infop, int l, int alphabet){  int nr_components, comps, a_index;   double occ_sums;  double q_value, scaling_factor, X_sum, *X_values, ed_res1, *logbeta_an_values;  double exponent, prior_prob, tot_prior_prob;  int a_size;  struct msa_letter_s *msa_seq;  /* check if this alphabet has a prior */  if(em_di->nr_components <= 0) {    //printf("em_di nr comps = %d\n",em_di->nr_components);    //printf("alphabet = %d\n", alphabet);    return;  }    /* set a_size and msa_seq according to alphabet */  if(alphabet == 1) {    a_size = hmmp->a_size;    msa_seq = msa_seq_infop->msa_seq_1;  }  if(alphabet == 2) {    a_size = hmmp->a_size_2;    msa_seq = msa_seq_infop->msa_seq_2;  }  if(alphabet == 3) {    a_size = hmmp->a_size_3;    msa_seq = msa_seq_infop->msa_seq_3;   }  if(alphabet == 4) {    a_size = hmmp->a_size_4;    msa_seq = msa_seq_infop->msa_seq_4;  }   /************the update part ********************/  nr_components = em_di->nr_components;  logbeta_an_values = malloc_or_die(nr_components * sizeof(double));  scaling_factor = -FLT_MAX;  X_sum = 0.0;  X_values = malloc_or_die(a_size * sizeof(double));    /* calculate logB(alpha + n) for all components +   * calculate scaling factor for logB(alpha + n) - logB(alpha) */  for(comps = 0; comps < nr_components; comps++) {    ed_res1 = 0;    occ_sums = 0;    for(a_index = 0; a_index < a_size; a_index++) {      ed_res1 += lgamma(*(em_di->prior_values +			  get_mtx_index(comps, a_index, a_size)) +			(double)((msa_seq +				 get_mtx_index(l,a_index,a_size+1))->nr_occurences));      occ_sums += (msa_seq + get_mtx_index(l,a_index, a_size+1))->nr_occurences;    }    ed_res1 = ed_res1 - lgamma(*(em_di->alpha_sums + comps) + (double)(occ_sums));    *(logbeta_an_values + comps) = ed_res1;    if((ed_res1 = ed_res1 - *(em_di->logbeta_values + comps)) > scaling_factor) {      scaling_factor = ed_res1;    }  }    /* calculate all the Xi's */  for(a_index = 0; a_index < a_size; a_index++) {    *(X_values + a_index) = 0;    for(comps = 0; comps < nr_components; comps++) {      q_value = *(em_di->q_values + comps);      exponent = (*(logbeta_an_values + comps) - *(em_di->logbeta_values + comps) - 		  scaling_factor);      prior_prob = (*(em_di->prior_values + get_mtx_index(comps,a_index, a_size)) +		    (double)((msa_seq +			     get_mtx_index(l,a_index,a_size+1))->nr_occurences));      tot_prior_prob = (*(em_di->alpha_sums + comps) + (double)(occ_sums));      *(X_values + a_index) += q_value * exp(exponent) * prior_prob / tot_prior_prob;#ifdef DEBUG_PRI      printf("\nscaling factor = %f\n", scaling_factor);      printf("a_index = %d\n", a_index);      printf("q_value = %f\n", q_value);      printf("exponent = %f\n", exponent);      printf("prior_prob = %f\n", prior_prob);      printf("tot_prior_prob = %f\n\n", tot_prior_prob);#endif    }    X_sum += *(X_values + a_index);  }    /* update share values */  //printf("a_size = %d\n",a_size);  for(a_index = 0; a_index < a_size; a_index++) {    ed_res1 = *(X_values + a_index) / X_sum;    if(ed_res1 != 0.0) {      (msa_seq + get_mtx_index(l, a_index, a_size+1))->prior_share = ed_res1;    }    else {      (msa_seq + get_mtx_index(l, a_index, a_size+1))->prior_share = ed_res1;    }    //printf("msa_seq = %f\n",(msa_seq + get_mtx_index(l, a_index, a_size+1))->prior_share);  }    //printf("returning\n");  /* cleanup */  free(logbeta_an_values);  free(X_values);}int replacement_letter_multi(struct letter_s *cur_letterp, struct replacement_letter_multi_s *replacement_letters, 		       struct msa_sequences_multi_s *msa_seq_infop, struct hmm_multi_s *hmmp, int seq_pos, int alphabet){  int i,j,k;  struct letter_s *repl_letter;  int same_letter;    /* find out if letter in cur_letterp is a replacement_letter */  if(alphabet == 1) {    if(replacement_letters->nr_rl_1 <= 0) {      return NO;    }    for(i = 0; i < replacement_letters->nr_rl_1; i++) {      repl_letter = replacement_letters->letters_1 + i;      same_le

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -