📄 dropfz2.c
字号:
} } } /* printf("The best score is %d\n", best); */ return best+gg;}/* ckalloc - allocate space; check for success */void *ckalloc(size_t amount){ void *p; if ((p = (void *)malloc( (size_t)amount)) == NULL) w_abort("Ran out of memory.",""); return(p);}/* calculate the 100% identical score */intshscore(unsigned char *aa0, int n0, int **pam2){ int i, sum; for (i=0,sum=0; i<n0; i++) sum += pam2[aa0[i]][aa0[i]]; return sum;}#define SGW1 100#define SGW2 300#define WIDTH 60typedef struct mat *match_ptr;typedef struct mat { int i, j, l; match_ptr next;} match_node;typedef struct { int i,j;} state;typedef state *state_ptr;void *ckalloc();static match_ptr small_global(), global();static int local_align(), find_best();static void init_row2(), init_ROW();intpro_dna(const unsigned char *prot_seq, /* array with prot. seq. numbers*/ int len_prot, /* length of prot. seq */ const unsigned char *dna_prot_seq, /* trans. DNA seq. numbers*/ int len_dna_prot, /* length trans. seq. */ int **pam_matrix, /* scoring matrix */ int gopen, int gext, /* gap open, gap extend penalties */ int gshift, /* frame-shift penalty */ struct f_struct *f_str, int max_res, struct a_res_str *a_res) /* alignment info */{ match_ptr align, ap, aq; int x, y, ex, ey, i, score; int *alignment; f_str->up = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10)); f_str->down = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10)); f_str->tp = (st_ptr) ckalloc(sizeof(struct st_s)*(len_dna_prot+10)); /*local alignment find the best local alignment x and y is the starting position of the best local alignment and ex ey is the ending position */ score= local_align(&x, &y, &ex, &ey, pam_matrix, gopen, gext, dna_prot_seq, len_dna_prot, prot_seq, len_prot, f_str); f_str->up += 3; f_str->down += 3; f_str->tp += 3; /* x, y - start in prot, dna_prot */ a_res->min0 = x; /* prot */ a_res->min1 = y; /* DNA */ a_res->max0 = ex; /* prot */ a_res->max1 = ey; /* DNA */ align = global(x, y, ex, ey, pam_matrix, gopen, gext, dna_prot_seq, prot_seq, 0, 0, f_str); alignment = a_res->res; for (ap = align, i= 0; ap; i++) { if (i < max_res) alignment[i] = ap->l; aq = ap->next; free(ap); ap = aq; } if (i >= max_res) fprintf(stderr,"***alignment truncated: %d/%d***\n", max_res,i); /* up = &up[-3]; down = &down[-3]; tp = &tp[-3]; */ free(&f_str->up[-3]); free(&f_str->tp[-3]); free(&f_str->down[-3]); a_res->nres = i; return score;}static voidswap(void **a, void **b){ void *t = *a; *a = *b; *b = t;}/* local alignment find the best local alignment x and y is the starting position of the best local alignment and ex ey is the ending position */static intlocal_align(int *x, int *y, int *ex, int *ey, int **wgts, int gop, int gext, const unsigned char *dnap, int ld, const unsigned char *pro, int lp, struct f_struct *f_str){ int i, j, score, x1,x2,x3,x4, e1 = 0, e2 = 0, e3, sc, del, e, best = 0, cd, ci, c; struct wgt *wt, *ww; state_ptr cur_st, last_st, cur_i_st; st_ptr cur, last; const unsigned char *dp; int *cur_d_st, *st_up; /* Array rowiC stores the best scores of alignment ending at a position Arrays rowiD and rowiI store the best scores of alignment ending at a position with a deletion or insrtion Arrays sti stores the starting position of the best alignment whose score stored in the corresponding row array. The program stores two rows to complete the computation, same is for the global alignment routine. */ st_up = (int *) ckalloc(sizeof(int)*(ld+10)); init_row2(st_up, ld+5); ld += 2; init_ROW(f_str->up, ld+1); init_ROW(f_str->down, ld+1); cur = f_str->up+1; last = f_str->down+1; cur_st = (state_ptr) ckalloc(sizeof(state)*(ld+1)); last_st = (state_ptr) ckalloc(sizeof(state)*(ld+1)); cur_i_st = (state_ptr) ckalloc(sizeof(state)*(ld+1)); cur_d_st = st_up; dp = dnap-2; for (i = 0; i < lp; i++) { wt = f_str->weight1[pro[i]]; e2 =0; e1 = last[0].C; for (j = 0; j < 2; j++) { cur_st[j].i = i+1; cur_st[j].j = j+1; } for (j = 2; j < ld; j++) { ww = &wt[(unsigned char) dp[j]]; del = -1; if (j >= 3) { sc = 0; e3 = e2; e2 = e1; e1 = last[j-2].C; if ((e=e2+ww->iii) > sc) {sc = e; del = 3;} if ((e=e1+ww->ii) > sc) {sc = e; del = 2;} if ((e = e3+ww->iv) > sc) {sc = e; del = 4;} } else { sc = e2 = 0; if (ww->iii > 0) {sc = ww->iii; del = 3;} } if (sc < (ci=last[j].I)) { sc = ci; del = 0; } if (sc < (cd=cur[j].D)) { sc = cd; del = 5; } cur[j].C = sc; e = sc - gop; if (e > cd) { cur[j+3].D = e-gext; cur_d_st[j+3] = 3; } else { cur[j+3].D = cd-gext; cur_d_st[j+3] = cur_d_st[j]+3; } switch(del) { case 5: c = cur_d_st[j]; cur_st[j].i = cur_st[j-c].i; cur_st[j].j = cur_st[j-c].j; break; case 0: cur_st[j].i = cur_i_st[j].i; cur_st[j].j = cur_i_st[j].j; break; case 2: case 3: case 4: if (i) { if (j-del >= 0) { cur_st[j].i = last_st[j-del].i; cur_st[j].j = last_st[j-del].j; } else { cur_st[j].i = i; cur_st[j].j = 0; } } else { cur_st[j].i = 0; cur_st[j].j = max(0, j-del+1); } break; case -1: cur_st[j].i = i+1; cur_st[j].j = j+1; break; } if (e > ci) { cur[j].I = e -gext; cur_i_st[j].i = cur_st[j].i; cur_i_st[j].j = cur_st[j].j; } else { cur[j].I = ci- gext; } if (sc > best) { x1 = cur_st[j].i; x2 = cur_st[j].j; best =sc; x3 = i; x4 = j; } } swap((void *)&last, (void *)&cur); swap((void *)&cur_st, (void *)&last_st); } /* printf("The best score is %d\n", best);*/ *x = x1; *y = x2; *ex = x3; *ey = x4; free(cur_st); free(last_st); free(cur_i_st); free(st_up); return best;}/* Both global_up and global_down do linear space score only global alignments on subsequence pro[x]...pro[ex], and dna[y]...dna[ey]. global_up do the algorithm upwards, from row x towards row y. global_down do the algorithm downwards, from row y towards x.*/static voidglobal_up(st_ptr *row1, st_ptr *row2, int x, int y, int ex, int ey, int **wgts, int gop, int gext, unsigned char *dnap, unsigned char *pro, int N, struct f_struct *f_str){ int i, j, k, sc, e, e1, e2, e3, t, ci, cd, score; struct wgt *wt, *ww; st_ptr cur, last; cur = *row1; last = *row2; sc = -gop; for (j = 0; j <= ey-y+1; j++) { if (j % 3 == 0) {last[j].C = sc; sc -= gext; last[j].I = sc-gop;} else { last[j].I = last[j].C = -10000;} } last[0].C = 0; cur[0].D = cur[1].D = cur[2].D = -10000; last[0].D = last[1].D = last[2].D = -10000; if (N) last[0].I = -gext; for (i = 1; i <= ex-x+1; i++) { wt = f_str->weight1[pro[i+x-1]]; e1 = -10000; e2 = last[0].C; for (j = 0; j <= ey-y+1; j++) { t = j+y; sc = -10000; ww = &wt[(unsigned char) dnap[t-3]]; if (j < 4) { if (j == 3) { sc = e2+ww->iii; } else if (j == 2) { sc = e2 + ww->ii; } } else { e3 = e2; e2 = e1; e1 = last[j-2].C; sc = max(e2+ww->iii, max(e1+ww->ii, e3+ww->iv)); } sc = max(sc, max(ci=last[j].I, cd = cur[j].D)); cur[j].C = sc; cur[j+3].D = max(cd, sc-gop)-gext; cur[j].I = max(ci, sc-gop)-gext; } swap((void *)&last, (void *)&cur); } /*printf("global up score =%d\n", last[ey-y+1].C);*/ for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I; if (*row1 != last) swap((void *)row1, (void *)row2);}static voidglobal_down(st_ptr *row1, st_ptr *row2, int x, int y, int ex, int ey, int **wgts, int gop, int gext, unsigned char *dnap, unsigned char *pro, int N, struct f_struct *f_str){ int i, j, k, sc, del, *tmp, e, t, e1,e2,e3, ci,cd, score; struct wgt *wt, *w1, *w2, *w3; st_ptr cur, last; cur = (*row1); last = *row2; sc = -gop; for (j = ey-y+1; j >= 0; j--) { if ((ey-y+1-j) % 3) {last[j].C = sc; sc-=gext; last[j].I = sc-gop;} else last[j].I = last[j].C = -10000; cur[j].I = -10000; } last[ey-y+1].C = 0; if (N) last[ey-y+1].I = -gext; cur[ey-y+1].D = cur[ey-y].D = cur[ey-y-1].D = -10000; last[ey-y+1].D = last[ey-y].D = last[ey-y-1].D = -10000; for (i = ex-x; i >= 0; i--) { wt = f_str->weight1[pro[i+x]]; e2 = last[ey-y+1].C; e1 = -10000; w3 = &wt[(unsigned char) dnap[ey]]; w2 = &wt[(unsigned char) dnap[ey-1]]; for (j = ey-y+1; j >= 0; j--) { t = j+y; w1 = &wt[(unsigned char) dnap[t-1]]; sc = -10000; if (t+3 > ey) { if (t+2 == ey) { sc = e2+w2->iii; } else if (t+1 == ey) { sc = e2+w1->ii; } } else { e3 = e2; e2 = e1; e1 = last[j+2].C; sc = max(e2+w2->iii, max(e1+w1->ii,e3+w3->iv)) ; } if (sc < (cd= cur[j].D)) { sc = cd; cur[j-3].D = cd-gext; } else cur[j-3].D =max(cd, sc-gop)-gext; if (sc < (ci= last[j].I)) { sc = ci; cur[j].I = ci - gext; } else cur[j].I = max(sc-gop,ci)-gext; cur[j].C = sc; w3 = w2; w2 = w1; } swap((void *)&last, (void *)&cur); } for (i = 0; i <= ey-y+1; i++) last[i].I = cur[i].I; if (*row1 != last) swap((void *)row1, (void *)row2);}static voidinit_row2(int *row, int ld) { int i; for (i = 0; i < ld; i++) row[i] = 0;}static void init_ROW(st_ptr row, int ld) { int i; for (i = 0; i < ld; i++) row[i].I = row[i].D = row[i].C = 0;}static match_ptrcombine(match_ptr x1, match_ptr x2, int st) { match_ptr x; if (x1 == NULL) return x2; for (x = x1; x->next; x = x->next); x->next = x2; if (st) { for (x = x2; x; x = x->next) { x->j++; if (x->l == 3 || x->l == 4) break; } x->l--; } return x1;}/* global use the two upwards and downwards score only linear space global alignment subroutine to recursively build the alignment.*/match_ptrglobal(int x, int y, int ex, int ey, int **wgts, int gop, int gext, unsigned char *dnap, unsigned char *pro, int N1, int N2, struct f_struct *f_str){ int m; int m1, m2; match_ptr x1, x2, mm1, mm2; /*printf("%d %d %d %d %d %d\n", x,y, ex, ey, N1, N2);*/ /* if the space required is limited, we can do a quadratic space algorithm to find the alignment. */ if (ex <= x) { mm1 = NULL; for (m = y+3; m <= ey; m+=3) { x1 = (match_ptr) ckalloc(sizeof(match_node)); x1->l = 5; x1->next = mm1; if (mm1== NULL) mm2 = x1; mm1 = x1; } if (ex == x) { if ((ey-y) % 3 != 0) { x1 = (match_ptr) ckalloc(sizeof(match_node)); x1->l = ((ey-y) % 3) +1; x1->next = NULL; if (mm1) mm2->next = x1; else mm1 = x1; } else mm2->l = 4; } return mm1; } if (ey <= y) { mm1 = NULL; for (m = x; m <= ex; m++) { x1 = (match_ptr) ckalloc(sizeof(match_node)); x1->l = 0; x1->next = mm1; mm1 = x1; } return mm1; } if (ex -x < SGW1 && ey-y < SGW2) return small_global(x,y,ex,ey,wgts, gop, gext, dnap, pro, N1, N2,f_str); m = (x+ex)/2; /* Do the score only global alignment from row x to row m, m is the middle row of x and ex. Store the information of row m in upC, upD, and upI. */ global_up(&f_str->up, &f_str->tp, x, y, m, ey, wgts, gop, gext, dnap, pro, N1, f_str); /* Do the score only global alignment downwards from row ex to row m+1, store information of row m+1 in downC downI and downD */ global_down(&f_str->down, &f_str->tp, m+1, y, ex, ey, wgts, gop, gext, dnap, pro, N2, f_str); /* Use this information for row m and m+1 to find the crossing point of the best alignment with the middle row. The crossing point is given by m1 and m2. Then we recursively call global itself to compute alignments in two smaller regions found by the crossing point and combine the two alignments to form a whole alignment. Return that alignment. */ if (find_best(f_str->up, f_str->down, &m1, &m2, ey-y+1, y, gop)) { x1 = global(x, y, m, m1, wgts, gop, gext, dnap, pro, N1, 0, f_str); x2 = global(m+1, m2, ex, ey, wgts, gop, gext, dnap, pro, 0, N2, f_str); if (m1 == m2) x1 = combine(x1,x2,1); else x1 = combine(x1, x2,0); } else { x1 = global(x, y, m-1, m1, wgts, gop, gext, dnap, pro, N1, 1, f_str); x2 = global(m+2, m2, ex, ey, wgts, gop, gext, dnap, pro, 1, N2, f_str); mm1 = (match_ptr) ckalloc(sizeof(match_node)); mm1->i = m; mm1->l = 0; mm1->j = m1; mm2 = (match_ptr) ckalloc(sizeof(match_node)); mm2->i = m+1; mm2->l = 0; mm2->j = m1; mm1->next = mm2; mm2->next = x2; x1 = combine(x1, mm1, 0); } return x1;}static intfind_best(st_ptr up, st_ptr down, int *m1, int *m2, int ld, int y, int gop) { int i, best = -1000, j = 0, s1, s2, s3, s4, st; for (i = 1; i < ld; i++) { s2 = up[i].C + down[i].C; s4 = up[i].I + down[i].I + gop; if (best < s2) { best = s2; j = i; st = 1; } if (best < s4) { best = s4; j = i; st = 0;
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -