📄 bz_main.c
字号:
#define VERSION 6#include "bz_all.h"#include <signal.h>enum{ DEFAULT_E = 30, DEFAULT_O = 400, DEFAULT_W = 8, DEFAULT_R = 0, DEFAULT_G = 0};static const char Usage[] ="\n%s command options (e.g., \"blastz seq1 seq2 B=0 C=2\"):\n\Default values are given in parentheses.\n\ m(80M) bytes of space for trace-back information\n\ v(0) 0: quiet; 1: verbose progress reports to stderr\n\ B(2) 0: single strand; >0: both strands\n\ C(0) 0: no chaining; 1: just output chain; 2: chain and extend;\n\ 3: just output HSPs\n\ E(30) gap-extension penalty.\n\ G(0) diagonal chaining penalty.\n\ H(0) interpolate between alignments at threshold K = argument.\n\ K(3000) threshold for MSPs\n\ L(K) threshold for gapped alignments\n\ M(50) mask threshold for seq1, if a bp is hit this many times\n\ O(400) gap-open penalty.\n\ P(1) 0: entropy not used; 1: entropy used; >1 entropy with feedback.\n\ Q load the scoring matrix from a file.\n\ R(0) antidiagonal chaining penalty.\n\ T(1) 0: W-bp words; 1: 12of19; 2: 12of19 without transitions.\n\ W(8) word size.\n\ X(10*(A-to-A match score)) X-drop parameter for ungapped extension.\n\ Y(O+300E) X-drop parameter for gapped extension.\n";static bz_flags_t bz_flags;static int Connect(msp_t *q, msp_t *p, int scale);align_t *inner(align_t *a, SEQ *sf1, SEQ *sf2, gap_scores_t gs, ss_t ss, ss_t sss, int Y, int innerK, TBack tback, bz_flags_t bz_flags, connect_t Connect, int MSP_Scale);static int verbose;// static ss_t ss;// static ss_t sss;static int ss[NACHARS][NACHARS];static int sss[NACHARS][NACHARS];enum { MSP_Scale = 100 };static void mkmask_ss(ss_t ss, ss_t sss){ int i, j, bad = ss['A']['X']; for (i = 0; i < NACHARS; ++i) for (j = 0; j < NACHARS; ++j) sss[i][j] = ((isupper(i) && isupper(j)) ? ss[i][j] : bad);}/* connect - compute penalty for connecting two fragements */static int Connect(msp_t *q, msp_t *p, int scale){ int q_xend, q_yend, diag_diff, substitutions; if (p->pos1 <= q->pos1 || p->pos2 <= q->pos2) fatal("HSPs improperly ordered for chaining."); q_xend = q->pos1 + q->len - 1; q_yend = q->pos2 + q->len - 1; diag_diff = (p->pos1 - p->pos2) - (q->pos1 - q->pos2); if (diag_diff >= 0) substitutions = p->pos2 - q_yend - 1; else { substitutions = p->pos1 - q_xend - 1; diag_diff = -diag_diff; } return diag_diff*bz_flags.G + (substitutions >= 0 ? substitutions*bz_flags.R : -substitutions*scale*ss[(uchar)'A'][(uchar)'A']);}/* strand1 -- process one sequence from the second file in one orientation */static void strand1(SEQ *sf1, uchar *rf1, SEQ *sf2, blast_table_t *bt, gap_scores_t gs, ss_t ss, ss_t sss, bz_flags_t *bf, TBack tback, census_t census[]){ static msp_table_t *mt = 0; int C = bf->C; int T = bf->T; int X = bf->X; int Y = bf->Y; int K = bf->K; int L = bf->L; int P = bf->P; int innerK = bf->H; if (mt == 0) mt = msp_new_table(); // XXX - not reentrant mt->num = 0; mt = (T?blast_1219_search:blast_search)(sf1, sf2, bt, mt, sss, X, K, P, T==1); if (C == 1 || C == 2) { msp_t *p; int f = 0; (void)msp_make_chain(mt, bz_flags.G, bz_flags.G, MSP_Scale, Connect); for (p = MSP_TAB_FIRST(mt); MSP_TAB_MORE(mt,p); p = MSP_TAB_NEXT(p)) { p->filter = 1 - p->cum_score; f |= p->filter; } msp_compress(mt); msp_sort_pos1(mt); } if (C == 1 || C > 2) { print_align_header(sf1, sf2, ss, &gs, K, L); print_msp_list(sf1, sf2, mt); } else if (mt != 0 && MSP_TAB_NUM(mt) != 0) { align_t *a; a = bz_extend_msps(sf1, rf1, sf2, mt, &gs, ss, Y, L, tback); /* next two lines added Aug. 16, 2002 */ if (a && innerK) a = inner(a, sf1, sf2, gs, ss, sss, Y, innerK, tback, bz_flags, Connect, MSP_Scale); if (a) { int n; print_align_header(sf1, sf2, ss, &gs, K, L); print_align_list(a); n = census_mask_align(a, SEQ_LEN(sf1), SEQ_CHARS(sf1), rf1, census, bf->M); printf("x {\n n %d\n}\n", n); } free_align_list(a); } //msp_free_table(mt); // XXX}static void bye(int sig){ exit(sig);}void reverse_inplace(uchar *s, int len){ uchar *p = s + len - 1; while (s <= p) { register uchar t = *p; *p-- = *s; *s++ = t; }}int main(int argc, char *argv[]){ SEQ *sf1, *sf2; uchar *rf1; blast_table_t *bt; gap_scores_t gs; int flag_strand, flag_size, flag_census, flag_reverse; char *scorefile, cmd[100]; TBack tback; census_t *census; (void) sprintf(cmd, "blastz.v%d", VERSION); argv0 = cmd; // flush stdio buffers when killed signal(SIGHUP, bye); signal(SIGINT, bye); signal(SIGTERM, bye); if (argc < 3) { fprintf(stderr, Usage, argv0); exit(01); } ckargs("BCEFGHKLMOPQRTWYbcmrv", argc, argv, 2); if (get_cargval('Q', &scorefile)) scores_from_file(scorefile, ss); else DNA_scores(ss); mkmask_ss(ss, sss); get_argval_pos('m', &flag_size, 80*1024*1024); get_argval_pos('b', &flag_reverse, 0); get_argval_pos('c', &flag_census, 0); get_argval_pos('v', &verbose, 0); get_argval_nonneg('B', &flag_strand, 2); get_argval_nonneg('C', &(bz_flags.C), 0); get_argval_nonneg('E', &(gs.E), DEFAULT_E); get_argval_nonneg('G', &(bz_flags.G), DEFAULT_G); get_argval_nonneg('H', &(bz_flags.H), 0); get_argval_pos('K', &(bz_flags.K), 3000); get_argval_pos('L', &(bz_flags.L), bz_flags.K); get_argval_nonneg('M', &(bz_flags.M), 50); get_argval_nonneg('O', &(gs.O), DEFAULT_O); get_argval_nonneg('P', &(bz_flags.P), 1); get_argval_nonneg('R', &(bz_flags.R), DEFAULT_R); get_argval_nonneg('T', &(bz_flags.T), 1); get_argval_pos('W', &(bz_flags.W), DEFAULT_W); get_argval_pos('X', &(bz_flags.X), 10*ss[(uchar)'A'][(uchar)'A']); get_argval_pos('Y', &(bz_flags.Y), (int)(gs.O+300*gs.E)); if (bz_flags.T) bz_flags.W = 12; sf1 = seq_open(argv[1]); if (!seq_read(sf1)) fatalf("Cannot read sequence from %s.", argv[1]); if (!is_DNA(SEQ_CHARS(sf1), SEQ_LEN(sf1))) fatal("The first sequence is not a DNA sequence."); if (flag_reverse & 01) reverse_inplace(SEQ_CHARS(sf1), SEQ_LEN(sf1)); rf1 = reverse_seq(SEQ_CHARS(sf1), SEQ_LEN(sf1)); sf2 = seq_open(argv[2]); if (!seq_read(sf2)) fatalf("Cannot read sequence from %s.", argv[2]); if (!is_DNA(SEQ_CHARS(sf2), SEQ_LEN(sf2))) fatal("The second sequence is not a DNA sequence."); bt = (bz_flags.T ? blast_1219_new : blast_table_new)(sf1, bz_flags.W); if (bz_flags.C == 0 || bz_flags.C == 2) { tback = ckalloc(sizeof(TB)); tback->space = ckalloc(flag_size*sizeof(uchar)); tback->len = flag_size; } else tback = NULL; census = new_census(SEQ_LEN(sf1)); print_job_header(ss, &gs, bz_flags.K, bz_flags.L, bz_flags.M); do { if (flag_reverse & 02) reverse_inplace(SEQ_CHARS(sf2), SEQ_LEN(sf2)); strand1(sf1, rf1, sf2, bt, gs, ss, sss, &bz_flags, tback, census); //strand1(sf1, rf1, sf2, bt, gs, ss, sss, X, Y, K, L, P, // flag_chain, tback, census, bz_flags.M, bz_flags.H); if (flag_strand > 0) { seq_revcomp_inplace(sf2); strand1(sf1, rf1, sf2, bt, gs, ss, sss, &bz_flags, tback, census); } fflush(stdout); } while (seq_read(sf2)); print_intervals(stdout, census, SEQ_LEN(sf1), bz_flags.M); if (flag_census) print_census(stdout, census, SEQ_LEN(sf1), 0); print_job_footer(); //print_statistics(); (bz_flags.T ? blast_1219_free : blast_table_free)(bt); seq_close(sf2); seq_close(sf1); if (tback) { ckfree(tback->space); ckfree(tback); } return 0;}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -