📄 blast_seqalign.cpp
字号:
} else { CRef<CDense_seg> dense_seg = x_CreateDenseg(master, slave, starts, lengths, strands); sar->SetSegs().SetDenseg(*dense_seg); } return sar;}static CRef<CSeq_align>x_GapEditBlock2SeqAlign(GapEditBlock* edit_block, const CSeq_id* id1, const CSeq_id* id2){ ASSERT(edit_block != NULL); vector<TSignedSeqPos> starts; vector<TSeqPos> lengths; vector<ENa_strand> strands; bool is_disc_align = false; int nsegs = 0; // number of segments in edit_block->esp for (GapEditScript* t = edit_block->esp; t; t = t->next, nsegs++) { if (t->op_type == eGapAlignDecline) is_disc_align = true; } if (is_disc_align) { /* By request of Steven Altschul - we need to have the unaligned part being to the left if it is adjacent to the gap (insertion or deletion) - so this function will do shuffeling */ x_CorrectUASequence(edit_block); CRef<CSeq_align> seqalign(new CSeq_align()); seqalign->SetType(CSeq_align::eType_partial); seqalign->SetDim(2); // BLAST only creates pairwise alignments bool skip_region; GapEditScript* curr = NULL,* curr_head = edit_block->esp; while (curr_head) { skip_region = false; for (nsegs = 0, curr = curr_head; curr; curr = curr->next, nsegs++){ if (curr->op_type == eGapAlignDecline) { if (nsegs != 0) { // end of aligned region break; } else { while (curr && curr->op_type == eGapAlignDecline) { nsegs++; curr = curr->next; } skip_region = true; break; } } } // build seqalign for required regions only if (!skip_region) { x_CollectSeqAlignData(edit_block, curr_head, nsegs, starts, lengths, strands); CRef<CSeq_align> sa_tmp = x_CreateSeqAlign(id1, id2, starts, lengths, strands, edit_block->translate1 !=0, edit_block->translate2 != 0, edit_block->reverse != 0); // Add this seqalign to the list if (sa_tmp) seqalign->SetSegs().SetDisc().Set().push_back(sa_tmp); } curr_head = curr; } return seqalign; } else { x_CollectSeqAlignData(edit_block, edit_block->esp, nsegs, starts, lengths, strands); return x_CreateSeqAlign(id1, id2, starts, lengths, strands, edit_block->translate1 != 0, edit_block->translate2 != 0, edit_block->reverse != 0); }}/** Get the current position. */static Int4 get_current_pos(Int4* pos, Int4 length){ Int4 val; if(*pos < 0) val = -(*pos + length -1); else val = *pos; *pos += length; return val;}/** This function is used for Out-Of-Frame traceback conversion * Convert an OOF EditScript chain to a SeqAlign of type StdSeg. * Used for a non-simple interval (i.e., one without subs. or * deletions). * The first SeqIdPtr in the chain of subject_id and query_id is * duplicated for the SeqAlign.*/static CRef<CSeq_align>x_OOFEditBlock2SeqAlign(EProgram program, GapEditBlock* edit_block, const CSeq_id* query_id, const CSeq_id* subject_id){ ASSERT(edit_block != NULL); CRef<CSeq_align> seqalign(new CSeq_align()); Boolean reverse = FALSE; GapEditScript* curr,* esp; Int2 frame1, frame2; Int4 start1, start2; Int4 original_length1, original_length2; CRef<CSeq_interval> seq_int1_last; CRef<CSeq_interval> seq_int2_last; CConstRef<CSeq_id> id1; CConstRef<CSeq_id> id2; CRef<CSeq_loc> slp1, slp2; ENa_strand strand1, strand2; bool first_shift; Int4 from1, from2, to1, to2; CRef<CSeq_id> tmp; if (program == eBlastx) { reverse = TRUE; start1 = edit_block->start2; start2 = edit_block->start1; frame1 = edit_block->frame2; frame2 = edit_block->frame1; original_length1 = edit_block->original_length2; original_length2 = edit_block->original_length1; id1.Reset(subject_id); id2.Reset(query_id); } else { start1 = edit_block->start1; start2 = edit_block->start2; frame1 = edit_block->frame1; frame2 = edit_block->frame2; original_length1 = edit_block->original_length1; original_length2 = edit_block->original_length2; id1.Reset(query_id); id2.Reset(subject_id); } strand1 = x_Frame2Strand(frame1); strand2 = x_Frame2Strand(frame2); esp = edit_block->esp; seqalign->SetDim(2); /**only two dimention alignment**/ seqalign->SetType(CSeq_align::eType_partial); /**partial for gapped translating search. */ esp = edit_block->esp; first_shift = false; for (curr=esp; curr; curr=curr->next) { slp1.Reset(new CSeq_loc()); slp2.Reset(new CSeq_loc()); switch (curr->op_type) { case eGapAlignDel: /* deletion of three nucleotides. */ first_shift = false; slp1->SetInt().SetFrom(get_current_pos(&start1, curr->num)); slp1->SetInt().SetTo(MIN(start1,original_length1) - 1); tmp.Reset(new CSeq_id(id1->AsFastaString())); slp1->SetInt().SetId(*tmp); slp1->SetInt().SetStrand(strand1); /* Empty nucleotide piece */ tmp.Reset(new CSeq_id(id2->AsFastaString())); slp2->SetEmpty(*tmp); seq_int1_last.Reset(&slp1->SetInt()); /* Keep previous seq_int2_last, in case there is a frame shift immediately after this gap */ break; case eGapAlignIns: /* insertion of three nucleotides. */ /* If gap is followed after frameshift - we have to add this element for the alignment to be correct */ if(first_shift) { /* Second frameshift in a row */ /* Protein coordinates */ slp1->SetInt().SetFrom(get_current_pos(&start1, 1)); to1 = MIN(start1,original_length1) - 1; slp1->SetInt().SetTo(to1); tmp.Reset(new CSeq_id(id1->AsFastaString())); slp1->SetInt().SetId(*tmp); slp1->SetInt().SetStrand(strand1); /* Nucleotide scale shifted by op_type */ from2 = get_current_pos(&start2, 3); to2 = MIN(start2,original_length2) - 1; slp2->SetInt().SetFrom(from2); slp2->SetInt().SetTo(to2); if (start2 > original_length2) slp1->SetInt().SetTo(to1 - 1); /* Transfer to DNA minus strand coordinates */ if(strand2 == eNa_strand_minus) { slp2->SetInt().SetTo(original_length2 - from2 - 1); slp2->SetInt().SetFrom(original_length2 - to2 - 1); } tmp.Reset(new CSeq_id(id2->AsFastaString())); slp2->SetInt().SetId(*tmp); slp2->SetInt().SetStrand(strand2); CRef<CStd_seg> seg(new CStd_seg()); seg->SetDim(2); CStd_seg::TIds& ids = seg->SetIds(); if (reverse) { seg->SetLoc().push_back(slp2); seg->SetLoc().push_back(slp1); tmp.Reset(new CSeq_id(id2->AsFastaString())); ids.push_back(tmp); tmp.Reset(new CSeq_id(id1->AsFastaString())); ids.push_back(tmp); } else { seg->SetLoc().push_back(slp1); seg->SetLoc().push_back(slp2); tmp.Reset(new CSeq_id(id1->AsFastaString())); ids.push_back(tmp); tmp.Reset(new CSeq_id(id2->AsFastaString())); ids.push_back(tmp); } ids.resize(seg->GetDim()); seqalign->SetSegs().SetStd().push_back(seg); } first_shift = false; /* Protein piece is empty */ tmp.Reset(new CSeq_id(id1->AsFastaString())); slp1->SetEmpty(*tmp); /* Nucleotide scale shifted by 3, protein gapped */ from2 = get_current_pos(&start2, curr->num*3); to2 = MIN(start2,original_length2) - 1; slp2->SetInt().SetFrom(from2); slp2->SetInt().SetTo(to2); /* Transfer to DNA minus strand coordinates */ if(strand2 == eNa_strand_minus) { slp2->SetInt().SetTo(original_length2 - from2 - 1); slp2->SetInt().SetFrom(original_length2 - to2 - 1); } tmp.Reset(new CSeq_id(id2->AsFastaString())); slp2->SetInt().SetId(*tmp); slp2->SetInt().SetStrand(strand2); seq_int1_last.Reset(NULL); seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */ break; case eGapAlignSub: /* Substitution. */ first_shift = false; /* Protein coordinates */ from1 = get_current_pos(&start1, curr->num); to1 = MIN(start1, original_length1) - 1; /* Adjusting last segment and new start point in nucleotide coordinates */ from2 = get_current_pos(&start2, curr->num*((Uint1)curr->op_type)); to2 = start2 - 1; /* Chop off three bases and one residue at a time. Why does this happen, seems like a bug? */ while (to2 >= original_length2) { to2 -= 3; to1--; } /* Transfer to DNA minus strand coordinates */ if(strand2 == eNa_strand_minus) { int tmp_int; tmp_int = to2; to2 = original_length2 - from2 - 1; from2 = original_length2 - tmp_int - 1; } slp1->SetInt().SetFrom(from1); slp1->SetInt().SetTo(to1); tmp.Reset(new CSeq_id(id1->AsFastaString())); slp1->SetInt().SetId(*tmp); slp1->SetInt().SetStrand(strand1); slp2->SetInt().SetFrom(from2); slp2->SetInt().SetTo(to2); tmp.Reset(new CSeq_id(id2->AsFastaString())); slp2->SetInt().SetId(*tmp); slp2->SetInt().SetStrand(strand2); seq_int1_last.Reset(&slp1->SetInt()); /* Will be used to adjust "to" value */ seq_int2_last.Reset(&slp2->SetInt()); /* Will be used to adjust "to" value */ break; case eGapAlignDel2: /* gap of two nucleotides. */ case eGapAlignDel1: /* Gap of one nucleotide. */ case eGapAlignIns1: /* Insertion of one nucleotide. */ case eGapAlignIns2: /* Insertion of two nucleotides. */ if(first_shift) { /* Second frameshift in a row */ /* Protein coordinates */ from1 = get_current_pos(&start1, 1); to1 = MIN(start1,original_length1) - 1; /* Nucleotide scale shifted by op_type */ from2 = get_current_pos(&start2, (Uint1)curr->op_type); to2 = start2 - 1; if (to2 >= original_length2) { to2 = original_length2 -1; to1--; } /* Transfer to DNA minus strand coordinates */ if(strand2 == eNa_strand_minus) { int tmp_int; tmp_int = to2; to2 = original_length2 - from2 - 1; from2 = original_length2 - tmp_int - 1; } slp1->SetInt().SetFrom(from1); slp1->SetInt().SetTo(to1); tmp.Reset(new CSeq_id(id1->AsFastaString())); slp1->SetInt().SetId(*tmp); slp1->SetInt().SetStrand(strand1); slp2->SetInt().SetFrom(from2); slp2->SetInt().SetTo(to2); tmp.Reset(new CSeq_id(id2->AsFastaString())); slp2->SetInt().SetId(*tmp); slp2->SetInt().SetStrand(strand2); seq_int1_last.Reset(&slp1->SetInt()); seq_int2_last.Reset(&slp2->SetInt()); break; } first_shift = true; /* If this substitution is following simple frameshift we do not need to start new segment, but may continue old one */ if(seq_int2_last) { get_current_pos(&start2, curr->num*((Uint1)curr->op_type-3)); if(strand2 != eNa_strand_minus) { seq_int2_last->SetTo(start2 - 1); } else { /* Transfer to DNA minus strand coordinates */ seq_int2_last->SetFrom(original_length2 - start2); } /* Adjustment for multiple shifts - theoretically possible, but very improbable */ if(seq_int2_last->GetFrom() > seq_int2_last->GetTo()) { if(strand2 != eNa_strand_minus) { seq_int2_last->SetTo(seq_int2_last->GetTo() + 3); } else { seq_int2_last->SetFrom(seq_int2_last->GetFrom() - 3); } if (seq_int1_last.GetPointer() && seq_int1_last->GetTo() != 0) seq_int1_last->SetTo(seq_int1_last->GetTo() + 1); } } else if ((Uint1)curr->op_type > 3) { /* Protein piece is empty */
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -