⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 set_profile_hmm_parameters.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 2 页
字号:
#include <stdio.h>#include <string.h>#include <stdlib.h>#include <math.h>#include <float.h>#include "structs.h"#include "funcs.h"#define STD_CAL 1#define EXT_CAL 2#define NEW 1#define ACID 2#define GAP 3#define INSERT_ACID 4#define INSERT_GAP 5#define SI_PROB 0.33#define EI_PROB 0.5extern int verbose;void calibrate_hmm_std();void calibrate_hmm_ext(FILE *seq_infile, int prior_scaler);void add_replacement_letter(char cur);int read_priorfile(FILE *priorfile);int priorize_shares(int pos, double *insert_emissions, int prior_scaler);struct hmm_s hmm;struct msa_sequences_s msa_seq_info;struct replacement_letter_s replacement_letters;struct emission_dirichlet_s em_di;int main(int argc, char* argv[]){  FILE *hmm_infile = NULL;  FILE *hmm_outfile = NULL;  FILE *seq_infile = NULL;  FILE *replfile = NULL;  FILE *priorfile = NULL;  int calibration_method = STD_CAL;  int lead_seq = 1;  int i = 0;  int prior_scaler = 1;  if(argc > 1) {    for(i = 1; i < argc; i++) {      if(strcmp(argv[i], "-ext") == 0) {	i++;	if((seq_infile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}	else {	  calibration_method = EXT_CAL;	  printf("Opened file %s for reading sequences\n",argv[i]);	}      }      else if(strcmp(argv[i], "-std") == 0) {	calibration_method = STD_CAL;      }      else if(strcmp(argv[i], "-hmm") == 0) {	i++;	if((hmm_infile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}	else {	  printf("Opened file %s for reading hmm\n",argv[i]);	}      }      else if(strcmp(argv[i], "-o") == 0) {	i++;	if((hmm_outfile = fopen(argv[i], "w")) == NULL) {	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}	else {	  printf("Opened file %s for writing\n",argv[i]);	}      }      else if(strcmp(argv[i], "-pri") == 0) {	i++;	if((priorfile = fopen(argv[i], "r")) == NULL) {	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}	else {	  printf("Opened file %s for reading priorfiles\n",argv[i]);	}      }      else if(strcmp(argv[i], "-prisc") == 0) {	i++;	prior_scaler = atoi(argv[i]);	if(prior_scaler <= 0) {	  printf("Incorrect option: %s\n", argv[i]);	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}      }      else if(strcmp(argv[i], "-r") == 0) {	i++;	if((replfile = fopen(argv[i], "r")) == NULL) {	  	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}	else {	  printf("Opened file %s for reading replacement letters\n",argv[i]);	}      }      else if(strcmp(argv[i], "-lead") == 0) {	i++;	lead_seq = atoi(argv[i]);	if(lead_seq <= 0) {	  printf("Incorrect option: %s\n", argv[i]);	  perror(argv[i]);	  printhelp_cal();	  exit(0);	}      }      else if(strcmp(argv[i], "-verbose") == 0) {	verbose = YES;      }      else if(strcmp(argv[i], "-h") == 0) {	printhelp_cal();	exit(0);      }    }    /* read hmm */    if(hmm_infile != NULL) {      readhmm(hmm_infile, &hmm);    }    else {      printhelp_cal();      exit(0);    }    /* get replacement letters */    if(replfile != NULL) {      get_replacement_letters(replfile, &replacement_letters);    }    else {      replacement_letters.nr_rl = 0;    }        /* read priorfile */    if(priorfile != NULL) {      read_priorfile(priorfile);    }    if(calibration_method == STD_CAL) {      calibrate_hmm_std();    }    else if(calibration_method == EXT_CAL) {      calibrate_hmm_ext(seq_infile, prior_scaler);    }    if(hmm_outfile != NULL) {      savehmm(hmm_outfile, &hmm);    }    else {      savehmm(fopen("a.hmg", "w"), &hmm);    }  }    /* cleanup */  if(priorfile != NULL) {    free(em_di.q_values);    free(em_di.alpha_sums);    free(em_di.logbeta_values);    free(em_di.prior_values);    fclose(priorfile);  }}void calibrate_hmm_std(){  /* delete - match - insert */  double GAP_SIZE = 7.0;  double INSERT_SIZE = 3.0;  double transprob_dd, transprob_ii;  int i;  /* calibrate delete states */  transprob_dd = exp(log(0.5) / GAP_SIZE);  for(i = 1; i < hmm.nr_v - 4; i += 3) {    /* update transitions to next delete state and next match state */    *(hmm.transitions + get_mtx_index(i, i+3, hmm.nr_v)) = transprob_dd;    *(hmm.transitions + get_mtx_index(i, i+4, hmm.nr_v)) = 1.0 - transprob_dd;  }  transprob_ii = exp(log(0.5) / INSERT_SIZE);  for(i = 3; i < hmm.nr_v - 3; i += 3) {    /* update transitions to next delete state and next match state */    *(hmm.transitions + get_mtx_index(i, i, hmm.nr_v)) = transprob_ii;    *(hmm.transitions + get_mtx_index(i, i-1, hmm.nr_v)) = 1.0 - transprob_ii;  }    }void calibrate_hmm_ext(FILE *seq_infile, int prior_scaler){  char cur;  int nr_mtx_columns, nr_insert_columns;  int *transitions;  double *insert_emissions;  //double *insert_acids;  int i,j;  int a_index, is_gap, new;  int cur_msa_column, cur_mtx_column, cur_mtx_col_nr, cur_insert_column;  int last_sign, cur_sign;  int d_tot, m_tot, i_tot, s_tot, dd, dm, mm, md, mi, ii, im, sd, sm;  int use_insert_matrix;  double transprob_ii;  char *row;  int INSERT_SIZE = 25;  int tot_nr_chars;  use_insert_matrix = YES;    //printf("check 1\n");  /* read first row, count indices of occupied columns */  cur = (char)fgetc(seq_infile);  if(cur != '<') {  }  nr_mtx_columns = 0;  nr_insert_columns = 0;  tot_nr_chars = 20;  while(1) {    /* read symbol */    cur = (char)fgetc(seq_infile);    tot_nr_chars = tot_nr_chars + 2;    if(cur == '>') {      break;    }    else if(cur == ' ' || cur == '-' || cur == '_' || cur == '.') {      if(new == YES) {	nr_insert_columns++;	new = NO;      }    }    else {      nr_mtx_columns++;      new = YES;    }    /* read ';' */    cur = (char)fgetc(seq_infile);    if(cur != ';') {          }  }  //printf("tot_nr_chars = %d\n",tot_nr_chars);  row = (char*)(malloc_or_die(tot_nr_chars * sizeof(char)));    //printf("check 2\n");  nr_mtx_columns++;  /* allocate memory for matrices, add one (pseudocount) to each post */  transitions = (int*)malloc_or_die(8 * nr_mtx_columns * sizeof(int));  insert_emissions = (double*)malloc_or_die(nr_mtx_columns * (hmm.a_size+1) * sizeof(double));  for(i = 1; i < 8; i++) {    for(j = 0; j < nr_mtx_columns; j++) {      *(transitions + get_mtx_index(i,j,nr_mtx_columns)) = 1;    }  }    //printf("check 3\n");  /* read first row, store indices of occupied columns + add counts for first row */  rewind(seq_infile);  cur_msa_column = 1;  cur_mtx_column = 0;  fgets(row, tot_nr_chars, seq_infile);  //printf("check 4\n");  for(i = 1;;i++) {    /* read symbol */    cur = row[i];    if(cur == '>') {      break;    }    else if(cur == ' ' || cur == '-' || cur == '_' || cur == '.') {      cur_msa_column++;    }    else if(cur == ';') {          }    else {      *(transitions + cur_mtx_column) = cur_msa_column;      *(transitions + get_mtx_index(1,cur_mtx_column, nr_mtx_columns)) += 1;      cur_mtx_column++;      cur_msa_column++;    }  }    //printf("check 5\n");  /* for all rows, add counts to matrix */  while((fgets(row, tot_nr_chars, seq_infile)) != NULL) {    cur_msa_column = 1;    cur_mtx_column = 0;    cur_insert_column = 0;     last_sign = NEW;    cur_sign = NEW;    for(i = 1;;i++) {      /* read symbol */      cur_mtx_col_nr = *(transitions + cur_mtx_column);      cur = row[i];      if(cur == '>') {	break;      }      else if(cur == ' ' || cur == '-' || cur == '_' || cur == '.') {	last_sign = cur_sign;	if(cur_msa_column == cur_mtx_col_nr) /* gap */{	  if(last_sign == ACID || last_sign == NEW) {	    cur_sign = GAP;	    *(transitions + get_mtx_index(2,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  else if(last_sign == GAP) {	    cur_sign = GAP;	    *(transitions + get_mtx_index(4,cur_mtx_column,nr_mtx_columns)) += 1;	  }	  else {	    	  }	  if(is_gap == YES) {	    is_gap = NO;	    cur_insert_column++;	  }	  cur_msa_column++;	  cur_mtx_column++;	}	else if(cur_msa_column < cur_mtx_col_nr) /* insert gap*/{	  if(last_sign == ACID || last_sign == NEW) {	    cur_sign = ACID;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -