⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 bz_extend.c

📁 一个类似于blast算法的基因数据快速搜索算法
💻 C
字号:
static const char rcsid[]="$Id: bz_extend.c,v 1.9 2003/02/18 18:01:12 webb Exp $";/* The main duties of procedures in this file are:*	1. Given a set of MSPs, call Align() on each one.*	2. Maintain a data structure that accumulates the alignments (one*		per MSP).*/#include "util.h"#include "seq.h"#include "bz_all.h"#include "bz_main.h"/* ----------- procedures to initialize the extended MSP structures --------  *//* For each MSP we need to keep more information (see blastz.h).  Here we*  allocate a larger node for each MSP and populate the most basic information.*  The point (pos1,pos2) is set to the MSP's seed-point for a gapped alignment.*//* find_best_31 - shifts (pos1,pos2) to the center of the MSP's best 31-mer */static void find_best_31(Msp_ptr mp, uchar *seq1, uchar *seq2, int len, ss_t ss){	int i, sum, best, shift;	uchar *s, *q;	if (len > 31) {		q = seq1 + mp->pos1; 		s = seq2 + mp->pos2;		for (sum = i = 0; i < 31; ++i)			sum += ss[*s++][*q++];		shift = 15;		best = sum;		for (i = 31; i < len; i++) {			sum -= ss[*(s-31)][*(q-31)];			sum += ss[*s++][*q++];			if (sum > best) {				best = sum;				shift = i - 15;			}		}	} else		shift = len/2;	mp->pos1 += shift;	mp->pos2 += shift;}static Msp_ptr *augment_msp_table(msp_table_t *mt, SEQ *sf1, SEQ *sf2, ss_t ss){	int numMSPs, i;	Msp_ptr *msp, mp;	msp_t *mst;	numMSPs = MSP_TAB_NUM(mt);	msp = ckallocz(sizeof(Msp_ptr)*(numMSPs+1));		/* (the extra place may be used for a trivial self-alignment) */	msp[0] = ckallocz(sizeof(Msp_node)*(numMSPs+1));	for (i = 1; i <= numMSPs; i++) 		msp[i] = msp[i-1]+1;	for (i = 0; i < numMSPs; i++) {		mp = msp[i];		mst = &(MSP_TAB_NTH(mt,i));		mp->pos1 = mst->pos1;		mp->pos2 = mst->pos2;		find_best_31(mp, SEQ_CHARS(sf1), SEQ_CHARS(sf2), mst->len, ss);	}	return msp;}#define HUGE_INT 100000000/* ------------- procedures to process the two lists of alignments ---------- *//* msp_updown -- given MSP m, we search obi, the list of alignments ordered*  by increasing beginning point, to find the alignments and their contained*  blocks (gap-free pieces) that are the closest above and below the MSP's*  seed-point.  0 is returned if the seed-point is in an already-computed*  alignment.*/static int msp_updown(Msp_ptr obi, Msp_ptr m){	int up = HUGE_INT, down = HUGE_INT, a, p1 = m->pos1, p2 = m->pos2;	block_ptr bp, bup, bdown;	Msp_ptr mup, mdown;	bup = bdown = NULL;	mup = mdown = NULL;	for ( ; obi && obi->pos1 <= p1; obi = obi->next_msp) {		if (obi->end1 < p1)			continue;		for (bp = obi->first_block; bp; bp = bp->next_block) 			if (bp->e1 >= p1)				break;		if (bp == NULL)			continue;	/* CAN HAPPEN, BUT HOW? */		if (bp->type == HOR)			fatal("msp_updown: cannot be horizontal.");		if (bp->type == DIA) 			a = p2 - (bp->b2 + p1 - bp->b1);		else			a = p2 - bp->b2;		if (a == 0)			return 0;		else if (a > 0 && down > a) {			down = a;			bdown = bp;			mdown = obi;		} else if (a < 0 && up > -a) {			up = -a;			bup = bp;			mup = obi;		}	}	m->up_block1 = m->up_block2 = bup;	m->up_align1 =  m->up_align2 = mup;	m->down_block1 = m->down_block2 = bdown;	m->down_align1 = m->down_align2 = mdown;	return 1;}/* align_updown -- given a new alignment, determine the alignments and their*  blocks that are the closest in either direction at both ends of the*  alignment.*/static void align_updown(Msp_ptr obi, Msp_ptr mptr){	int left_up = HUGE_INT, right_up = HUGE_INT, left_down = HUGE_INT,	  right_down = HUGE_INT, a;	block_ptr bp, up_left, up_right, down_left, down_right;	Msp_ptr up_align_left, up_align_right, down_align_left, down_align_right;	up_align_left = down_align_left = up_align_right = down_align_right = NULL;	up_left = up_right = down_left = down_right = NULL;	for ( ; obi; obi = obi->next_msp) {		if (obi->pos1 <= mptr->end1 && obi->end1 >= mptr->pos1) {			for (bp = obi->first_block; bp; bp = bp->next_block) 				if (bp->type != HOR && bp->e1 >= mptr->pos1)					break;			if (bp && bp->b1 <= mptr->pos1) {				if (bp->type == DIA) 					a = mptr->pos2 -					  (bp->b2 + mptr->pos1 - bp->b1);				else					a = mptr->pos2 - bp->b2;				if (a > 0 && left_down > a) {					left_down = a;					down_left = bp;					down_align_left = obi;				} else if (a < 0 && left_up > -a) {					left_up = -a;					up_left = bp;					up_align_left = obi;				}			}/* DO WE NEED THESE CHECKS FOR BP->TYPE != HOR) ???? */			for ( ; bp; bp = bp->next_block) 				if (bp->type != HOR && bp->e1 >= mptr->end1)					break;			if (bp && bp->type != HOR &&  bp->e1 >= mptr->end1) {				if (bp->type == DIA) 					a = mptr->end2 -					  (bp->b2 + mptr->end1 - bp->b1);				else					a = mptr->end2 - bp->b2;				if (a > 0 && right_down > a) {					right_down = a;					down_right = bp;					down_align_right = obi;				} else if (a < 0 && right_up > -a) {					right_up = -a;					up_right = bp;					up_align_right = obi;				}			}		}	}	mptr->up_block1 = up_left;	mptr->up_align1 = up_align_left;	mptr->up_block2 = up_right;	mptr->up_align2 = up_align_right;	mptr->down_block1 = down_left;	mptr->down_align1 = down_align_left;	mptr->down_block2 = down_right;	mptr->down_align2 = down_align_right;}/* insert_align - insert a new alignment in the two lists of alignments,*  one ordered by increasing beginning point and the other ordered by*  decreasing end point.*/static void insert_align(Msp_ptr mptr, Msp_ptr *obi, Msp_ptr *oed){	Msp_ptr mp, mq;	Msp_ptr order_beg_inc=*obi, order_end_dec=*oed;	if (mptr->first_block == NULL)		fatal("insert_align: null first block.");	for (mq = NULL, mp = order_beg_inc; mp; mq = mp, mp = mp->next_msp)		if (mp->pos1 >= mptr->pos1)			break;	if (mq) {		mq->next_msp = mptr;		mptr->next_msp = mp;	} else {		mptr->next_msp = order_beg_inc;		order_beg_inc = mptr;	}	for (mq = NULL, mp = order_end_dec; mp; mq = mp, mp = mp->prev_msp)		if (mp->end1 <= mptr->end1)			break;	if (mq) {		mq->prev_msp = mptr;		mptr->prev_msp = mp;	} else {		mptr->prev_msp = order_end_dec;		order_end_dec = mptr;	}	*obi = order_beg_inc; *oed = order_end_dec;}/* get_closest - in preparation for extending the seedpoint of an MSP, find*  the closest alignment ending before the seedpoint and the closest*  alignment starting after the seedpoint.*/static void get_closest(int b1, AlignIOPtr align_IO, Msp_ptr order_beg_inc,  Msp_ptr order_end_dec){	Msp_ptr mp;	for (mp = order_end_dec; mp; mp = mp->prev_msp)		if (mp->end1 < b1)			break;	align_IO->Left_list = mp;	for (mp = order_beg_inc; mp; mp = mp->next_msp)		if (mp->pos1 > b1)			break;	align_IO->Right_list = mp;}/* ------------------ procedures to process a new alignment  ---------------- *//* save the internal form of a gap-free block */static void insert_to_tail(Msp_ptr mp, block_ptr bp){	bp->prev_block = mp->first_block->prev_block;	bp->next_block = mp->first_block;	mp->first_block->prev_block->next_block = bp;	mp->first_block->prev_block = bp;}/* save_block - add a gap-free piece of alignment at the end of the list kept*  with the seeding MSP.  If appropriate, insert a horizontal or vertical*  piece before it.*/static void save_block(int b1, int b2, int e1, int e2, Msp_ptr mptr){	block_ptr bq, bp = (block_ptr) ckalloc(sizeof(block));	bp->b1 = b1; bp->b2 = b2; bp->e1= e1; bp->e2 = e2;	bp->type = DIA;	if (mptr->first_block) {		bq = (block_ptr) ckalloc(sizeof(block));		bq->type =		  ((b1 == mptr->first_block->prev_block->e1 + 1) ? HOR : VER);		bq->b1 = mptr->first_block->prev_block->e1 + 1;		bq->b2 = mptr->first_block->prev_block->e2 + 1;		bq->e1 = b1 - 1; bq->e2 = b2 - 1;		insert_to_tail(mptr, bq);		insert_to_tail(mptr, bp);	} else		mptr->first_block = bp->prev_block = bp->next_block = bp;}/* format_alignment - process the edit script for a newly computed alignment*  by (1) storing a linked-list form with the seeding MSP and (2) augmenting*  the edit script into a form suitable for returning to the calling program.*/static align_t *format_alignment(AlignIOPtr align_IO, Msp_ptr mptr) {        int beg1, end1, beg2, end2, M, N, i, j, opn, not_used, start_i,	  start_j, run;	edit_script_t *S;	uchar *seq1, *seq2;	align_t *a;	beg1 = align_IO->start1 + 1;  	end1 = align_IO->stop1 + 1;	beg2 = align_IO->start2 + 1;	end2 = align_IO->stop2 + 1;	S = align_IO->ed_scr;	seq1 = align_IO->seq1;	seq2 = align_IO->seq2;	M = end1 - beg1 + 1;	N = end2 - beg2 + 1;	for (opn = i = j = 0; i < M || j < N; ) {		start_i = i;		start_j = j;                run = es_rep_len(S, &opn, seq1+beg1+i-1, seq2+beg2+j-1,		  &not_used);                i += run; j += run; 		save_block(beg1+start_i-1, beg2+start_j-1,				  beg1+i-2, beg2+j-2, mptr);                if (i < M || j < N)                  es_indel_len(S, &opn, &i, &j);	    	}	a = ckalloc(sizeof(align_t));	a->script = S;	a->beg1 = beg1; a->beg2 = beg2; 	a->end1 = end1; a->end2 = end2;	a->seq1 = seq1; a->seq2 = seq2;	a->score = align_IO->score;	a->next_align = NULL;	return a;}uchar *reverse_seq(uchar *s, int len){	uchar *p = ckalloc(len+1), *t;	for (t = p + len - 1; t >= p; --t)		*t = *s++;	p[len] = '\0';	return p;}align_t *bz_extend_msps(SEQ *sf1, uchar *rf1, SEQ *sf2, msp_table_t *mt,  gap_scores_t *ds, ss_t ss, int Y, int K, TBack tback){	int i, *sp[NACHARS], numMSPs = MSP_TAB_NUM(mt);	double scor;	AlignIOPtr align_IO;	Msp_ptr mp, *msp, mptr, order_beg_inc = NULL,	  order_end_dec = NULL;	align_t *head, *last;	block_ptr bp, bq;	mt = msp_sort(mt);	/* sort by decreasing score */	msp = augment_msp_table(mt, sf1, sf2, ss);	align_IO = ckalloc(sizeof(*align_IO));	for (i = 0; i < NACHARS; i++)		sp[i] = ss[i];	align_IO->subscore = sp;	align_IO->len2 = SEQ_LEN(sf2);	align_IO->len1 = SEQ_LEN(sf1);	align_IO->gap_open = ds->O;	align_IO->gap_extend = ds->E;	align_IO->x_parameter = Y;	align_IO->seq1 = SEQ_CHARS(sf1);	align_IO->seq2 = SEQ_CHARS(sf2);	align_IO->seq1_r = rf1;	/* align_IO->seq1_r = reverse_seq(SEQ_CHARS(sf1), SEQ_LEN(sf1)); */	align_IO->seq2_r = reverse_seq(SEQ_CHARS(sf2), SEQ_LEN(sf2));	if (tback == NULL)		fatal("bz_extend_msps got a NULL tback pointer.");	align_IO->tback = tback;	/* Special case: trivial alignment in a self-comparison. */	if (SEQ_LEN(sf1) == SEQ_LEN(sf2)) {		uchar *s = SEQ_CHARS(sf1), *t = SEQ_CHARS(sf2), S, T;		for (scor = i = 0; i < SEQ_LEN(sf1); ++i) {			S = toupper(s[i]);			T = toupper(t[i]);			scor += ss[S][T];			if (S != T)				break;		}		if (i == SEQ_LEN(sf1)) {   /* seq1 and seq2 are identical */			align_t *a;			mp = msp[numMSPs];			mp->pos1 =  mp->pos2 = 0;			mp->end1 = mp->end2 = SEQ_LEN(sf1)-1;			mp->up_align1 = mp->up_align2 = mp->down_align1 =			  mp->down_align2 = NULL;			mp->up_block1 = mp->up_block2 = mp->down_block1 =			  mp->down_block2 = NULL;			mp->first_block = NULL;			save_block(0, 0, mp->end1, mp->end2, msp[numMSPs]);			insert_align(mp, &order_beg_inc, &order_end_dec);			mp->last_block = mp->first_block;			mp->first_block->prev_block =			  mp->last_block->next_block = NULL;			a = mp->align = ckalloc(sizeof(align_t));			a->script = edit_script_new();			(void)edit_script_rep(a->script, SEQ_LEN(sf1));			a->beg1 = a->beg2 = 1;			a->end1 = a->end2 = SEQ_LEN(sf1);			a->seq1 = a->seq2 = SEQ_CHARS(sf1);			if (scor < (float)K)				a->score = K;	// so it won't be discard			else if (scor > (float)INT_MAX)				a->score = INT_MAX;			else				a->score = scor;			a->next_align = NULL;		}	}	/* Loop over the (non-trivial) MSPs. */	for (i = 0; i < numMSPs; i++) {		mptr = msp[i];		if (!msp_updown(order_beg_inc, mptr))			continue;		align_IO->seed1 = mptr->pos1;		align_IO->seed2 = mptr->pos2;		get_closest(mptr->pos1, align_IO, order_beg_inc, order_end_dec);		align_IO->Up_align = mptr->up_align1;		align_IO->Down_align = mptr->down_align1;		align_IO->Up_block = mptr->up_block1;		align_IO->Down_block = mptr->down_block1;		Align(align_IO);		mptr->align = format_alignment(align_IO, mptr);		mptr->pos1 = align_IO->start1;		mptr->pos2 = align_IO->start2;		mptr->end1 = align_IO->stop1;		mptr->end2 = align_IO->stop2;		if (mptr->first_block) { /* HOW CAN THIS BE FALSE??? */			mptr->last_block = mptr->first_block->prev_block;			mptr->first_block->prev_block =			   mptr->last_block->next_block = NULL;			align_updown(order_beg_inc, mptr);			insert_align(mptr, &order_beg_inc, &order_end_dec);		} 	}	/* Link the alignments for return to the calling procedure. */	head = last = NULL;	for (mp = order_beg_inc; mp; mp = mp->next_msp)		if (mp->align->score >= K) {			if (head == NULL)				head = last = mp->align;			else				last = last->next_align = mp->align;		}	/* Clean up and return. */	for (mp = order_beg_inc; mp; mp = mp->next_msp)		for (bp = mp->first_block; bp; bp = bq) {			bq = bp->next_block;			ckfree(bp); 		}	ckfree(msp[0]);	ckfree(msp);	// not ours: ckfree(align_IO->seq1_r);	ckfree(align_IO->seq2_r);	ckfree(align_IO);	return head;}	

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -