📄 dropfx.c
字号:
/* alignment store the operation that align the protein and dna sequence. The code of the number in the array is as follows: 0: delete of an amino acid. 2: frame shift, 2 nucleotides match with an amino acid 3: match an amino acid with a codon 4: the other type of frame shift 5: delete of a codon Also the first two element of the array stores the starting point in the protein and dna sequences in the local alignment. Display looks like where WIDTH is assumed to be divisible by 10. 0 . : . : . : . : . : . : CCTATGATACTGGGATACTGGAACGTCCGCGGACTGACACACCCGATCCGCATGCTCCTG P M I L G Y W N V R G L T H P I R M L L 60 . : . : . : . : . : . : GAATACACAGACTCAAGCTATGATGAGAAGAGATACACCATGGGTGACGCTCCCGACTTT E Y T D S S Y D E K R Y T M G D A P D F *//* fatal - print message and die */void fatal(msg)char *msg;{ fprintf(stderr, "%s\n", msg); exit(1);}voiddo_walign (const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int frame, struct pstruct *ppst, struct f_struct *f_str, struct a_res_str *a_res, int *have_ares){ int score; int i, last_n1, itemp, n10; int n_aa, n_nt, hoff, nt_min, nt_max, w_fact; unsigned char *fs, *fd; struct rstruct rst; int itx; a_res->res = f_str->res; /* pro_dna needs a valid res at start */ *have_ares = 1; /* pre_cons() required for TFAST */#ifndef TFAST /* FASTX */ n_aa = n1; n_nt = n0; /* check for large differences in sequence length */ nt_min = 0; nt_max = n_nt; if (n_nt > 6 * n_aa) { /* find out where the diagonal is - get hoff hoff < 0 => seq0 is in the middle of seq1 */ do_fastx(f_str->aa0x, n0, aa1, n1, ppst, f_str, &rst, &hoff); if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;} else w_fact = 2; if (hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */ nt_min = max(0,(hoff-w_fact*n_aa)*3); nt_max = min((hoff+w_fact*n_aa)*3,n_nt); } else { nt_max = min(3*w_fact*n_aa,n_nt); } } score = pro_dna(aa1, n1, f_str->aa0y+nt_min, nt_max-nt_min, ppst->pam2[0],#ifdef OLD_FASTA_GAP -(ppst->gdelval - ppst->ggapval),#else -ppst->gdelval,#endif -ppst->ggapval, -ppst->gshift, f_str->max_res, a_res); /* correct for nt_min missing residues in alignment */#else /* TFASTX */ /* for (i=0; i<n1; i++) { fputc(ppst->sq[f_str->aa1x[i]],stderr); if (i%60==59) fputc('\n',stderr); } fprintf(stderr,"\n-----\n"); */ last_n1 = 0; for (itx=3*frame; itx<3+3*frame; itx++) { n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);/* for (i=0; i<n10; i++) { fprintf(stderr,"%c",ppst->sq[aa10[last_n1+i]]); if ((i%60)==59) fprintf(stderr,"\n"); } fprintf(stderr,"\n");*/ last_n1 += n10+1; } n10 = last_n1-1; /* create aa1y from aa1x */ for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) { for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs; *fd=EOSEQ; } /* for (i=0; i<n1; i++) { fputc(ppst->sq[f_str->aa1y[i]],stderr); if (i%60==59) fputc('\n',stderr); } fprintf(stderr,"\n-----\n"); */ n_aa = n0; n_nt = n1; /* check for large differences in sequence length */ nt_min = 0; nt_max = n_nt; if (n_nt > 6 * n_aa) { /* find out where the diagonal is - get hoff hoff < 0 => seq0 is in the middle of seq1 */ do_fastx(aa0, n0, f_str->aa1x, n10, ppst, f_str, &rst, &hoff); if (rst.score[0] > 2 * rst.score[2]) {w_fact = 4;} else w_fact = 2; if ( hoff > n_aa) { /* hoff > 0 => seq1 is in the middle of seq0 */ nt_min = max(0,(hoff-w_fact*n_aa)*3); nt_max = min((hoff+w_fact*n_aa)*3,n_nt); } else { nt_max = min(3*w_fact*n_aa,n_nt); } } score = pro_dna(aa0, n0, f_str->aa1y+nt_min, nt_max-nt_min, ppst->pam2[0],#ifdef OLD_FASTA_GAP -(ppst->gdelval - ppst->ggapval),#else -ppst->gdelval,#endif -ppst->ggapval, -ppst->gshift, f_str->max_res, a_res);#endif /* TFASTX */ /* pro_dna always compares protein to DNA, and returns protein coordinates in a_res->min0,max0 */ a_res->min1 += nt_min; a_res->max1 += nt_min; /* display_alig(f_str->res,f_str->aa0y,aa1,*nres,n0); */ a_res->sw_score = score;}/* aln_func_vals - set up aln.qlfact, qlrev, llfact, llmult, frame, llrev *//* call from calcons, calc_id, calc_code */void aln_func_vals(int frame, struct a_struct *aln) {#ifndef TFAST aln->llrev = 0; aln->llfact = 1; aln->llmult = 1; aln->qlfact = 3; aln->frame = 0; if (frame > 0) aln->qlrev = 1; else aln->qlrev = 0;#else /* TFASTX */ aln->qlfact = 1; aln->qlrev = 0; aln->llfact = 3; aln->llmult = 1; aln->frame = 0; if (frame > 0) aln->llrev = 1; else aln->llrev = 0;#endif /* TFASTX */}/* this function is required for programs like tfastx/y/s that do translations on DNA sequences and save them in f_str->aa1??*/voidpre_cons(const unsigned char *aa1, int n1, int frame, struct f_struct *f_str) {#ifdef TFAST int i, last_n1, itemp, n10; unsigned char *fs, *fd; int itx; last_n1 = 0; for (itx=3*frame; itx<3+3*frame; itx++) { n10 = saatran(aa1,&f_str->aa1x[last_n1],n1,itx);/* for (i=0; i<n10; i++) { fprintf(stderr,"%c",ppst->sq[aa10[last_n1+i]]); if ((i%60)==59) fprintf(stderr,"\n"); } fprintf(stderr,"\n");*/ last_n1 += n10+1; } n10 = last_n1-1; /* create aa1y from aa1x */ for (fs=f_str->aa1x,itemp=0; itemp <3; itemp++,fs++) { for (fd= &f_str->aa1y[itemp]; *fs!=EOSEQ; fd += 3, fs++) *fd = *fs; *fd=EOSEQ; }#endif}/* Alignment: store the operation that align the protein and dna sequence. The code of the number in the array is as follows: 0: delete of an amino acid. 2: frame shift, 2 nucleotides match with an amino acid 3: match an amino acid with a codon 4: the other type of frame shift 5: delete of a codon The first two elements of the array stores the starting point in the protein and dna sequences in the local alignment.*/#include "a_mark.h"intcalcons_a(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, int *nc, struct a_struct *aln, struct a_res_str *a_res, struct pstruct *ppst, char *seqc0, char *seqc1, char *seqca, unsigned char *aa0a, char *seqc0a, char *ann_arr, struct f_struct *f_str){ int i0, i1, i, j; int lenc, not_c, itmp, ngap_p, ngap_d, nfs; char *sp0, *sp0a, *sp1, *spa, *sq; const unsigned char *ap0, *ap1; int *rp, *rpmax; int have_ann = 0; if (ppst->ext_sq_set) {sq = ppst->sqx;} else {sq = ppst->sq;}#ifndef TFAST /* FASTX */ aln->amin1 = aln->smin1 = a_res->min0; /* prot */ aln->amin0 = aln->smin0 = a_res->min1; /* DNA */ ap0 = f_str->aa0y; /* translated DNA */ ap1 = aa1; /* protein */#else /* TFASTX */ have_ann = (ann_arr[0]!='\0' && aa0a != NULL); aln->amin0 = aln->smin0 = a_res->min0; /* DNA */ aln->amin1 = aln->smin1 = a_res->min1; /* prot */ ap1 = aa0; ap0 = f_str->aa1y;#endif rp = a_res->res; rpmax = &a_res->res[a_res->nres];#ifndef TFAST sp0 = seqc0; sp1 = seqc1;#else sp1 = seqc0; sp0 = seqc1;#endif spa = seqca; sp0a = seqc0a; lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs= 0; i0 = a_res->min1; i1 = a_res->min0; while (rp < rpmax) { /* fprintf(stderr,"%d %d %d (%c) %d (%c)\n" ,(int)(rp-res),*rp,i0,sq[ap0[i0]],i1,sq[ap1[i1]]); */ switch (*rp++) { case 0: /* aa insertion */ *sp0++ = '-'; *sp1++ = sq[ap1[i1++]]; *spa++ = M_DEL; *sp0a++ = ' '; lenc++; ngap_d++; break; case 2: /* -1 frameshift */ nfs++; *sp0++ = '/'; i0 -= 1; *sp1++ = '-'; *spa++ = M_DEL; *sp0a++ = ' '; not_c++; if ((itmp=ppst->pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;} if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];} else {*sp0a++ = ' ';} *sp0 = sq[ap0[i0]]; i0 += 3; *sp1 = sq[ap1[i1++]]; if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;} sp0++; sp1++; spa++; lenc++; break; case 3: /* codon/aa match */ if ((itmp=ppst->pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;} if (have_ann) {*sp0a++ = ann_arr[aa0a[i1]];} else {*sp0a++ = ' ';} *sp0 = sq[ap0[i0]]; i0 += 3; *sp1 = sq[ap1[i1++]]; if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;} sp0++; sp1++; spa++; lenc++; break; case 4: /* +1 frameshift */ nfs++; *sp0a++ = ' '; *sp0++ = '\\'; i0 += 1; *sp1++ = '-'; *spa++ = M_DEL; not_c++; if ((itmp=ppst->pam2[0][ap0[i0]][ap1[i1]])<0) { *spa = M_NEG; } else if (itmp == 0) { *spa = M_ZERO;} else {*spa = M_POS;} if (*spa == M_POS || *spa == M_ZERO) { aln->nsim++;} *sp0 = sq[ap0[i0]]; i0 += 3; *sp1 = sq[ap1[i1++]]; if (toupper(*sp0) == toupper(*sp1)) {aln->nident++; *spa = M_IDENT;} sp0++; sp1++; spa++; lenc++; break; case 5: /* codon insertion */ *sp0a++ = ' '; *sp0++ = sq[ap0[i0]]; i0 += 3; *sp1++ = '-'; *spa++ = M_DEL; lenc++; ngap_p++; break; } } *sp0a = *spa = '\0';#ifndef TFAST aln->amax0 = i0; aln->amax1 = i1; aln->ngap_q = ngap_d; aln->ngap_l = ngap_p;#else aln->amax1 = i0; aln->amax0 = i1; aln->ngap_q = ngap_p; aln->ngap_l = ngap_d;#endif aln->nfs = nfs; if (lenc < 0) lenc = 1; *nc = lenc;/* now we have the middle, get the right end */ return lenc+not_c;}/* build an array of match/ins/del - length strings */int calc_code(const unsigned char *aa0, int n0, const unsigned char *aa1, int n1, struct a_struct *aln, struct a_res_str *a_res, struct pstruct *ppst, char *al_str, int al_str_n, const unsigned char *aa0a, const char *ann_arr, char *ann_str, int ann_str_n, struct f_struct *f_str){ int i0, i1, i, j; int lenc, not_c, itmp, ngap_p, ngap_d, nfs; char op_char[10]; int op, op_cnt; char sp0, sp1, *sq; const unsigned char *ap0, *ap1; int *rp, *rpmax; int have_ann = 0; char tmp_astr[MAX_STR]; int sim_code; char *sim_sym= aln_map_sym[MX_ACC]; if (ppst->ext_sq_set) {sq = ppst->sqx;} else {sq = ppst->sq;}#ifndef TFAST /* FASTX */ strncpy(op_char,"- /=\\+*",sizeof(op_char)); aln->amin1 = aln->smin1 = a_res->min0; /* prot */ aln->amin0 = aln->smin0 = a_res->min1; /* DNA */ ap0 = f_str->aa0y; ap1 = aa1;#else /* TFASTX */ have_ann = (ann_arr[0] != '\0' && aa0a != NULL); strncpy(op_char,"+ /=\\-*",sizeof(op_char)); aln->amin0 = aln->smin0 = a_res->min0; /* DNA */ aln->amin1 = aln->smin1 = a_res->min1; /* prot */ ap1 = aa0; ap0 = f_str->aa1y;#endif rp = a_res->res; rpmax = &a_res->res[a_res->nres]; op_cnt = lenc = not_c = aln->nident = aln->nsim = ngap_p = ngap_d = nfs = 0; op = 3; /* code for a match - all alignments start with a match */ i0 = a_res->min1; i1 = a_res->min0; while (rp < rpmax) { switch (*rp++) { case 0: /* aa insertion */ if (op == 0) op_cnt++; else { update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char); op = 0; op_cnt = 1; } i1++; lenc++; ngap_d++; break; case 2: /* -1 frameshift */ if (ppst->pam2[0][ap0[i0]][ap1[i1]]>=0) { aln->nsim++;} update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char); op = 2; op_cnt = 1; update_code(al_str, al_str_n-strlen(al_str),op, op_cnt,op_char); op = 3; op_cnt = 1; nfs++; i0 -= 1; not_c++; sp0 = sq[ap0[i0]]; i0 += 3; sp1 = sq[ap1[i1++]]; if (toupper(sp0) == toupper(sp1)) aln->nident++; lenc++; break; case 3: /* codon/aa match */ sim_code = M_NEG; if (ppst->pam2[0][ap0[i0]][ap1[i1]]>=0) { sim_code = M_POS; aln->nsim++; } sp0 = sq[ap0[i0]]; sp1 = sq[ap1[i1]]; if (toupper(sp0) == toupper(sp1)) { aln->nident++; sim_code = M_IDENT; } /* check for an annotation */ if (have_ann && ann_arr[aa0a[i0]] != ' ') { sprintf(tmp_astr, "|%d:%d:%c%c%c%c", aln->q_offset+i1+1,aln->l_offset+i0+1,ann_arr[aa0a[i0]],sim_sym[sim_code],sp0,sp1); strncat(ann_str, tmp_astr, ann_str_n - strlen(ann_str) - 1)
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -