⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bz_main.c

📁 一个类似于blast算法的基因数据快速搜索算法
💻 C
字号:
#define VERSION 6#include "bz_all.h"#include <signal.h>enum{ DEFAULT_E =  30, DEFAULT_O = 400, DEFAULT_W =   8, DEFAULT_R =   0, DEFAULT_G =   0};static const char Usage[] ="\n%s command options (e.g., \"blastz seq1 seq2 B=0 C=2\"):\n\Default values are given in parentheses.\n\  m(80M) bytes of space for trace-back information\n\  v(0) 0: quiet; 1: verbose progress reports to stderr\n\  B(2) 0: single strand; >0: both strands\n\  C(0) 0: no chaining; 1: just output chain; 2: chain and extend;\n\       3: just output HSPs\n\  E(30) gap-extension penalty.\n\  G(0) diagonal chaining penalty.\n\  H(0) interpolate between alignments at threshold K = argument.\n\  K(3000) threshold for MSPs\n\  L(K) threshold for gapped alignments\n\  M(50) mask threshold for seq1, if a bp is hit this many times\n\  O(400) gap-open penalty.\n\  P(1) 0: entropy not used; 1: entropy used; >1 entropy with feedback.\n\  Q load the scoring matrix from a file.\n\  R(0) antidiagonal chaining penalty.\n\  T(1) 0: W-bp words;  1: 12of19;  2: 12of19 without transitions.\n\  W(8) word size.\n\  X(10*(A-to-A match score)) X-drop parameter for ungapped extension.\n\  Y(O+300E) X-drop parameter for gapped extension.\n";static bz_flags_t bz_flags;static int Connect(msp_t *q, msp_t *p, int scale);align_t *inner(align_t *a, SEQ *sf1, SEQ *sf2, gap_scores_t gs, ss_t ss,  ss_t sss, int Y, int innerK, TBack tback, bz_flags_t bz_flags,  connect_t Connect, int MSP_Scale);static int verbose;// static ss_t ss;// static ss_t sss;static int ss[NACHARS][NACHARS];static int sss[NACHARS][NACHARS];enum { MSP_Scale = 100 };static void mkmask_ss(ss_t ss, ss_t sss){	int i, j, bad = ss['A']['X'];	for (i = 0; i < NACHARS; ++i)		for (j = 0; j < NACHARS; ++j)		   sss[i][j] = ((isupper(i) && isupper(j)) ? ss[i][j] : bad);}/* connect - compute penalty for connecting two fragements */static int Connect(msp_t *q, msp_t *p, int scale){	int q_xend, q_yend, diag_diff, substitutions;	if (p->pos1 <= q->pos1 || p->pos2 <= q->pos2)		fatal("HSPs improperly ordered for chaining.");	q_xend = q->pos1 + q->len - 1;	q_yend = q->pos2 + q->len - 1;	diag_diff = (p->pos1 - p->pos2) - (q->pos1 - q->pos2);	if (diag_diff >= 0)		substitutions = p->pos2 - q_yend - 1;	else {		substitutions = p->pos1 - q_xend - 1;		diag_diff = -diag_diff;	}	return diag_diff*bz_flags.G + (substitutions >= 0 ?		substitutions*bz_flags.R :		-substitutions*scale*ss[(uchar)'A'][(uchar)'A']);}/* strand1 -- process one sequence from the second file in one orientation */static void strand1(SEQ *sf1, uchar *rf1, SEQ *sf2, blast_table_t *bt,    gap_scores_t gs, ss_t ss, ss_t sss, bz_flags_t *bf,    TBack tback, census_t census[]){	static msp_table_t *mt = 0;	int C = bf->C;	int T = bf->T;	int X = bf->X;	int Y = bf->Y;	int K = bf->K;	int L = bf->L;	int P = bf->P;	int innerK = bf->H;	if (mt == 0) mt = msp_new_table(); // XXX - not reentrant	mt->num = 0;	mt = (T?blast_1219_search:blast_search)(sf1, sf2, bt, mt, sss, X, K, P, T==1);	if (C == 1 || C == 2) {	    msp_t *p;	    int f = 0;	    	    (void)msp_make_chain(mt, bz_flags.G, bz_flags.G, MSP_Scale, Connect);	    for (p = MSP_TAB_FIRST(mt); MSP_TAB_MORE(mt,p); p = MSP_TAB_NEXT(p)) {		p->filter = 1 - p->cum_score;		f |= p->filter;	    }	    msp_compress(mt);	    msp_sort_pos1(mt);	}	if (C == 1 || C > 2) {		print_align_header(sf1, sf2, ss, &gs, K, L);	        print_msp_list(sf1, sf2, mt);	} else if (mt != 0 && MSP_TAB_NUM(mt) != 0) {		align_t *a;		a = bz_extend_msps(sf1, rf1, sf2, mt, &gs, ss, Y, L, tback);		/* next two lines added Aug. 16, 2002 */		if (a && innerK)		       a = inner(a, sf1, sf2, gs, ss, sss, Y, innerK, tback,			  bz_flags, Connect, MSP_Scale);		if (a) {			int n;			print_align_header(sf1, sf2, ss, &gs, K, L);			print_align_list(a);			n = census_mask_align(a, SEQ_LEN(sf1), SEQ_CHARS(sf1), rf1, census, bf->M);			printf("x {\n  n %d\n}\n", n);		}		free_align_list(a);	}	//msp_free_table(mt); // XXX}static void bye(int sig){	exit(sig);}void reverse_inplace(uchar *s, int len){        uchar *p = s + len - 1;        while (s <= p) {                register uchar t = *p;                *p-- = *s;                *s++ = t;	}}int main(int argc, char *argv[]){	SEQ *sf1, *sf2;	uchar *rf1;	blast_table_t *bt;	gap_scores_t gs;	int flag_strand, flag_size, flag_census, flag_reverse;	char *scorefile, cmd[100];	TBack tback;	census_t *census;	(void) sprintf(cmd, "blastz.v%d", VERSION);	argv0 = cmd;	// flush stdio buffers when killed	signal(SIGHUP, bye);	signal(SIGINT, bye);	signal(SIGTERM, bye);	if (argc < 3) {		fprintf(stderr, Usage, argv0);		exit(01);	}	ckargs("BCEFGHKLMOPQRTWYbcmrv", argc, argv, 2);	if (get_cargval('Q', &scorefile))		scores_from_file(scorefile, ss);	else		DNA_scores(ss);	mkmask_ss(ss, sss);	get_argval_pos('m', &flag_size, 80*1024*1024);	get_argval_pos('b', &flag_reverse, 0);	get_argval_pos('c', &flag_census, 0);	get_argval_pos('v', &verbose, 0);	get_argval_nonneg('B', &flag_strand, 2);	get_argval_nonneg('C', &(bz_flags.C), 0);	get_argval_nonneg('E', &(gs.E), DEFAULT_E);	get_argval_nonneg('G', &(bz_flags.G), DEFAULT_G);	get_argval_nonneg('H', &(bz_flags.H), 0);	get_argval_pos('K', &(bz_flags.K), 3000);	get_argval_pos('L', &(bz_flags.L), bz_flags.K);	get_argval_nonneg('M', &(bz_flags.M), 50);	get_argval_nonneg('O', &(gs.O), DEFAULT_O);	get_argval_nonneg('P', &(bz_flags.P), 1);	get_argval_nonneg('R', &(bz_flags.R), DEFAULT_R);	get_argval_nonneg('T', &(bz_flags.T), 1);	get_argval_pos('W', &(bz_flags.W), DEFAULT_W);	get_argval_pos('X', &(bz_flags.X), 10*ss[(uchar)'A'][(uchar)'A']);	get_argval_pos('Y', &(bz_flags.Y), (int)(gs.O+300*gs.E));	if (bz_flags.T) bz_flags.W = 12;        sf1 = seq_open(argv[1]);        if (!seq_read(sf1))		fatalf("Cannot read sequence from %s.", argv[1]);        if (!is_DNA(SEQ_CHARS(sf1), SEQ_LEN(sf1)))                fatal("The first sequence is not a DNA sequence.");	if (flag_reverse & 01)	    reverse_inplace(SEQ_CHARS(sf1), SEQ_LEN(sf1));	rf1 = reverse_seq(SEQ_CHARS(sf1), SEQ_LEN(sf1));		sf2 = seq_open(argv[2]);	if (!seq_read(sf2))		fatalf("Cannot read sequence from %s.", argv[2]);	if (!is_DNA(SEQ_CHARS(sf2), SEQ_LEN(sf2)))		fatal("The second sequence is not a DNA sequence.");	bt = (bz_flags.T ? blast_1219_new : blast_table_new)(sf1, bz_flags.W);	if (bz_flags.C == 0 || bz_flags.C == 2) {        	tback = ckalloc(sizeof(TB));        	tback->space = ckalloc(flag_size*sizeof(uchar));        	tback->len = flag_size;	} else		tback = NULL;	census = new_census(SEQ_LEN(sf1));	print_job_header(ss, &gs, bz_flags.K, bz_flags.L, bz_flags.M);	do {		if (flag_reverse & 02)		    reverse_inplace(SEQ_CHARS(sf2), SEQ_LEN(sf2));		strand1(sf1, rf1, sf2, bt, gs, ss, sss, &bz_flags, tback, census);		//strand1(sf1, rf1, sf2, bt, gs, ss, sss, X, Y, K, L, P,		//  flag_chain, tback, census, bz_flags.M, bz_flags.H);		if (flag_strand > 0) {			seq_revcomp_inplace(sf2);			strand1(sf1, rf1, sf2, bt, gs, ss, sss, &bz_flags, tback, census);		}		fflush(stdout);	} while (seq_read(sf2));	print_intervals(stdout, census, SEQ_LEN(sf1), bz_flags.M);	if (flag_census) print_census(stdout, census, SEQ_LEN(sf1), 0);	print_job_footer();	//print_statistics();        (bz_flags.T ? blast_1219_free : blast_table_free)(bt);        seq_close(sf2);	seq_close(sf1);	if (tback) {		ckfree(tback->space);		ckfree(tback);	}	return 0;}

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -