⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 readseqs_multialpha.c

📁 马尔科夫模型的java版本实现
💻 C
📖 第 1 页 / 共 5 页
字号:
  msa_length = 0;  seq_length = 0;  seq_index = 0;  longest_seq = -1;  longest_seq_length = 0;  is_first = YES;    while(1) {    i = fgetc(seqfile);    if(i == '<') {      break;    }    else if(i == '#') {      break;    }    else if(i == '\s' && i == '\n') {         }    else {      printf("Sequence file does not seem to be in correct format\n");      exit(0);    }  }  rewind(seqfile);  if(priorfile_a1 == NULL) {    use_priordistribution_1 = NONE;    use_priordistribution_2 = NONE;    use_priordistribution_3 = NONE;    use_priordistribution_4 = NONE;  }  else {    read_priorfile = read_multi_prior_file_multi(&em_di_1, hmmp, priorfile_a1, 1);    if(read_priorfile > 0) {      use_priordistribution_1 = DIRICHLET;    }    else if(read_priorfile < 0) {      printf("Error: Incorrect priorfile format\n");      exit(0);    }    else {      use_priordistribution_1 = NONE;    }    if(hmmp->nr_alphabets > 1) {      read_priorfile = read_multi_prior_file_multi(&em_di_2, hmmp, priorfile_a1, 2);      if(read_priorfile > 0) {	use_priordistribution_2 = DIRICHLET;      }      else if(read_priorfile < 0) {	printf("Error: Incorrect priorfile format\n");	exit(0);      }      else {	use_priordistribution_2 = NONE;      }    }    if(hmmp->nr_alphabets > 2) {      read_priorfile = read_multi_prior_file_multi(&em_di_3, hmmp, priorfile_a1, 3);      if(read_priorfile > 0) {	use_priordistribution_3 = DIRICHLET;      }      else if(read_priorfile < 0) {	printf("Error: Incorrect priorfile format\n");	exit(0);      }      else {	use_priordistribution_3 = NONE;      }    }    if(hmmp->nr_alphabets > 3) {      read_priorfile = read_multi_prior_file_multi(&em_di_4, hmmp, priorfile_a1, 4);      if(read_priorfile > 0) {	use_priordistribution_4 = DIRICHLET;      }      else if(read_priorfile < 0) {	printf("Error: Incorrect priorfile format\n");	exit(0);      }      else {	use_priordistribution_4 = NONE;      }    }  }     if(replacement_letters->nr_alphabets == 0) {    use_replacement_letters = NO;  }  else {    use_replacement_letters = YES;  }  /* check if file is empty */  is_empty = YES;  while(done != YES) {    c = (char)fgetc(seqfile);    if((int)c == EOF) {      break;    }    else if (c == '<' || c == '#') {      is_empty = NO;      break;    }  }  if(is_empty == YES) {    if(verbose == YES) {      printf("File is empty\n");    }   }  else {  }  rewind(seqfile);  last = '!';  nr_alphabets = 0;  nr_alphabets_temp = 0;  nr_continuous_alphabets = 0;  nr_continuous_alphabets_temp = 0;  while(done != YES) {    c = (char)fgetc(seqfile);    if((int)c == EOF) {      done = YES;      continue;    }    if(c == '<' && last != '<') {      inside_seq = YES;      seq_length = 0;      if(is_first == YES) {	msa_seq_length = 0;      }      seq_index++;      nr_seqs++;      last = '<';      nr_alphabets_temp = 1;    }    else if(c == '#' && last != '#') {      inside_seq = YES;      seq_length = 0;      if(is_first == YES) {	msa_seq_length = 0;      }      seq_index++;      last = '#';      nr_alphabets_temp = 1;    }    else if(c == '<') {      nr_alphabets_temp++;    }    else if(c == '#') {      nr_alphabets_temp++;      nr_continuous_alphabets_temp++;    }    else if(c == '>') {      if(seq_length > longest_seq_length) {	longest_seq = seq_index;	longest_seq_length = seq_length;      }      inside_seq = NO;      is_first = NO;      if(nr_alphabets_temp > nr_alphabets) {	nr_alphabets = nr_alphabets_temp;	switch(nr_alphabets) {	case 1: hmmp->alphabet_type = DISCRETE; break;	case 2: hmmp->alphabet_type_2 = DISCRETE; break;	case 3: hmmp->alphabet_type_3 = DISCRETE; break;	case 4: hmmp->alphabet_type_4 = DISCRETE; break;	}      }            nr_alphabets_temp = 0;      last = '!';    }    else if(c == '+') {      if(seq_length > longest_seq_length) {	longest_seq = seq_index;	longest_seq_length = seq_length;      }      inside_seq = NO;      is_first = NO;      if(nr_alphabets_temp > nr_alphabets) {	nr_alphabets = nr_alphabets_temp;	switch(nr_alphabets) {	case 1: hmmp->alphabet_type = CONTINUOUS; break;	case 2: hmmp->alphabet_type_2 = CONTINUOUS; break;	case 3: hmmp->alphabet_type_3 = CONTINUOUS; break;	case 4: hmmp->alphabet_type_4 = CONTINUOUS; break;	}	      }      if(nr_continuous_alphabets_temp > nr_continuous_alphabets) {	nr_continuous_alphabets = nr_continuous_alphabets_temp;      }                  nr_alphabets_temp = 0;      nr_continuous_alphabets_temp = 0;      last = '!';    }    if(c == ';' && inside_seq == YES) {      if(is_first == YES) {	msa_seq_length++;      }      seq_length++;      last = '!';    }    else if(inside_seq == YES && (c == '_' || c == ' ' || c == '-' || c == '.')) {      seq_length--;      last = '!';    }  }  nr_seqs = nr_seqs / (nr_alphabets - nr_continuous_alphabets);  msa_seq_infop->nr_seqs = nr_seqs;#ifdef DEBUG_MSA_SEQ_STD  printf("reached checkpoint 1\n");  printf("msa_seq_length = %d\n", msa_seq_length);  printf("nr_alphabets = %d\n", nr_alphabets);  printf("longest seq = %d\n", longest_seq);  printf("longest_seq_length = %d\n", longest_seq_length);  printf("nr_seqs = %d\n", nr_seqs);#endif  msa_seq_infop->msa_seq_1 = (struct msa_letter_s*)    malloc_or_die(msa_seq_length * (hmmp->a_size+1) * sizeof(struct msa_letter_s));  msa_seq_infop->lead_columns_start = (int*)malloc_or_die((msa_seq_length+1) * sizeof(int));  if(hmmp->nr_alphabets > 1){    msa_seq_infop->msa_seq_2 = (struct msa_letter_s*)      malloc_or_die(msa_seq_length * (hmmp->a_size_2+1) * sizeof(struct msa_letter_s));  }  if(hmmp->nr_alphabets > 2){    msa_seq_infop->msa_seq_3 = (struct msa_letter_s*)      malloc_or_die(msa_seq_length * (hmmp->a_size_3+1) * sizeof(struct msa_letter_s));  }  if(hmmp->nr_alphabets > 3){    msa_seq_infop->msa_seq_4 = (struct msa_letter_s*)      malloc_or_die(msa_seq_length * (hmmp->a_size_4+1) * sizeof(struct msa_letter_s));  }    /* read first alphabet, save nr of occurences of each letter in each position,   * including nr of gaps */  rewind(seqfile);  seq_pos = 0;  inside_seq = NO;  seq_index = 0;  get_letter_columns = NO;  k = 0;  l = 0;  nr_lead_columns = 0;  if(lead_seq == LONGEST_SEQ) {    lead_seq = longest_seq;  }    for(m = 0; m < nr_seqs;) {    i = fgetc(seqfile);    if(i == EOF) {      break;          }    c = (char)i;    if(c == '<') {      seq_pos = 0;      inside_seq = YES;      seq_index++;      if(seq_index == lead_seq) {	get_letter_columns = YES;	get_query_letters = YES;      }    }    else if(c == '#') {      get_letter_columns = YES;      fgets(line, MAX_LINE, seqfile);      cur_line = line;      while(*cur_line == '#') {	cur_line++;      }      while(*cur_line != '+') {	if(*cur_line == '_' || *cur_line == ' ' ||	   (*cur_line == '-' && *(cur_line + 1) == ';') || *cur_line == '.') /* gap */ {	  (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos, 0, hmmp->a_size+1))->share = 0.0;	  (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos, 0, hmmp->a_size+1))->nr_occurences = 0.0;	  cur_line++;	}	else if(*cur_line == 'X') {	  (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos, 0, hmmp->a_size+1))->share = 0.0;	  (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos, 0, hmmp->a_size+1))->nr_occurences = -1.0;	  cur_line++;	}	else {	  letter_val = strtod(cur_line, &cur_line);	  (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos,0, hmmp->a_size+1))->share = letter_val;	  (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos,0, hmmp->a_size+1))->nr_occurences = 1.0;	  if(get_letter_columns == YES) {	    *(msa_seq_infop->lead_columns_start + k) = seq_pos; /* letter column */	    k++;	    nr_lead_columns++;	  }	}	if(*cur_line != ';') {	  printf("Strange continuous sequence file format\n");	  exit(0);	}	else {	  cur_line = cur_line + 1;	  seq_pos++;	}      }      get_letter_columns = NO;      break;    }    else if(c == '>') {      inside_seq = NO;      get_letter_columns = NO;      get_query_letters = NO;      m++;    }    else if(inside_seq == YES) {      /* reading one letter */      letter_pos = 0;      cur_letter.letter[letter_pos] = c;      letter_pos++;      while((c = (char)fgetc(seqfile)) != ';') {	cur_letter.letter[letter_pos] = c;	letter_pos++;      }      if(letter_pos > 4) {	printf("max letter size = 4 characters\n");	exit(0);      }      cur_letter.letter[letter_pos] = '\0';      if(seq_pos + 1 > msa_length) {	msa_length = seq_pos + 1; /* update nr of positions */      }      if(cur_letter.letter[0] == '_' || cur_letter.letter[0] == ' ' ||	 cur_letter.letter[0] == '-' || cur_letter.letter[0] == '.') /* gap */ {	(msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos, hmmp->a_size, hmmp->a_size+1))->nr_occurences	  += 1;	seq_pos++;      }      else /* non gap character */ {	if(use_replacement_letters == YES &&	   replacement_letter_multi(&cur_letter, replacement_letters, msa_seq_infop, hmmp, seq_pos,1) == YES){	  /* adding of nr_occurences for these letters is done on the fly in replacement_letter-function */	}	else /* add occurence of this letter */{	  a_index = get_alphabet_index(&cur_letter, hmmp->alphabet, hmmp->a_size);	  //printf("a_index = %d\n", a_index);	  if(a_index < 0) {	    printf("Could not read msa seq from file: letter '%s' is not in alphabet\n", cur_letter.letter);	    exit(0);	  }	  else {	    (msa_seq_infop->msa_seq_1 + get_mtx_index(seq_pos,a_index, hmmp->a_size+1))->nr_occurences += 1;	  }	}	if(get_letter_columns == YES) {	  *(msa_seq_infop->lead_columns_start + k) = seq_pos; /* letter column */	  k++;	  nr_lead_columns++;	}	seq_pos++;      }      if(get_query_letters == YES) {	memcpy((msa_seq_infop->msa_seq_1 + l * (hmmp->a_size + 1))->query_letter, &(cur_letter.letter),	       sizeof(char) * 5); /* query letter */	l++;      }    }    //printf("m = %d\n",m);  }  /* read second alphabet, save nr of occurences of each letter in each position,   * including nr of gaps */  if(hmmp->nr_alphabets > 1) {    seq_pos = 0;    inside_seq = NO;    seq_index = 0;    get_letter_columns = NO;    l = 0;    last = '!';    for(m = 0; m < nr_seqs;) {      i = fgetc(seqfile);      if(i == EOF) {	break;      }      c = (char)i;      //printf("alpha2:c = %c\n",c);            if(c == '<' && last != '<') {	seq_pos = 0;	inside_seq = YES;	seq_index++;	if(seq_index == lead_seq) {	  get_query_letters = YES;	}	last = c;      }      else if(c == '<') {	last = c;      }      else if(c == '#') {	fgets(line, MAX_LINE, seqfile);	cur_line = line;	while(*cur_line == '#') {	  cur_line++;	}	while(*cur_line != '+') {	  if(*cur_line == '_' || *cur_line == ' ' ||	     (*cur_line == '-' && *(cur_line + 1) == ';') || *cur_line == '.') /* gap */ {	    (msa_seq_infop->msa_seq_2 + get_mtx_index(seq_pos, 0, hmmp->a_size_2+1))->share = 0.0;	    (msa_seq_infop->msa_seq_2 + get_mtx_index(seq_pos, 0, hmmp->a_size_2+1))->nr_occurences = 0.0;	    cur_line++;	  }	  else if(*cur_line == 'X') {	    (msa_seq_infop->msa_seq_2 + get_mtx_index(seq_pos, 0, hmmp->a_size_2+1))->share = 0.0;	    (msa_seq_infop->msa_seq_2 + get_mtx_index(seq_pos, 0, hmmp->a_size_2+1))->nr_occurences = -1.0;	    cur_line++;	  }	  else {	    letter_val = strtod(cur_line, &cur_line);	    (msa_seq_infop->msa_seq_2 + get_mtx_index(seq_pos,0, hmmp->a_size_2+1))->share = letter_val;	    (msa_seq_infop->msa_seq_2 + get_mtx_index(seq_pos,0, hmmp->a_size_2+1))->nr_occurences = 1.0;	  }	  if(*cur_line != ';') {	    printf("Strange continuous sequence file format\n");	    exit(0);	  }	  else {	    cur_line = cur_line + 1;

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -