mast2html

来自「EM算法的改进」· 代码 · 共 781 行 · 第 1/2 页

TXT
781
字号
#!/usr/bin/perl## $Id: mast2html.pl 1339 2006-09-21 19:46:28Z tbailey $# $Log$# Revision 1.3  2005/08/23 23:56:33  nadya# change use of convert2html from module to perl file# update regexpr string that makes a tag## Revision 1.2  2005/08/12 17:52:20  nadya# rename 2html into convert2html. Perl needs module name start with alpha char,# not numeric## Revision 1.1.1.1  2005/07/30 02:00:59  nadya# Importing from meme-3.0.14, and adding configure/make##use lib qw(/root/lib/perl);require "/root/lib/perl/convert2html.pl";$pgm = $0;                      # name of program$pgm =~ s#.*/##;                # remove part up to last slash@args = @ARGV;                  # arguments to program$status = 0;                    # exit status$SIG{'INT'} = 'cleanup';        # interrupt handler## get directories# $usage = <<USAGE;		# usage message  USAGE:        $pgm [-e_skip <min> <max>]	[-e_skip <min> <max>]	skip sequences where <max> > e-value > <min>USAGE# mast2html:# Starting with standard MAST output:#     add external links to NCBI#     add internal navigation links#     add html motif diagrams## adapted from the earlier diagram program dh3.pl## v1.00     24 October 1997     Michael Gribskov# v1.01		18 November 1997    Tim Bailey#	update to MAST 2.3, include nucleic acid sequences# v1.02     18 November 1997    Michael Gribskov#	added buttons # v1.03		 4 December 1997	Michael Gribskov#	added handling for non NCBI/gi sequences# v1.04		 5 December 1997	Tim Bailey#	converted non-motif regions to lines#	bug fixes# v1.05		 5 December 1997	Michael Gribskov#	bug fixes#   	reduce HTML table output and format source# v1.06		16 December 1997	Tim Bailey#	Make diagrams each a separate table embedded in single table for sec ii.#	Spacer lines increased in thickness when sequence too long to fit.#	Weak motifs shown in smaller font.#	Translated DNA shown at 1/3 scale.#	Minimum motif width in pixels enforced.#	Fixed splitting of diagrams in annotation section.#	Relies on peptide/nucleotide flags printed by MAST for db and motifs.# v1.07 	17 March 1997		Tim Bailey#	Fix Netscape 4.0 bug with embedded tables;#	Required 1) setting WIDTH in embedded table to width + 40 (magic number)#		 2) adding an invisible last <TD> to give all tables same width# v1.08 	23 March 1997		Tim Bailey#	Add -e_skip switches for use in preparing sample MAST output# v1.09 	30 March 1997		Tim Bailey#	Add \n after elipsis; change padding for Netscape 4.0 to#		4 + 2*#motifs + 3*#spacers# v1.10#	change padding for Netscape 4.0 to#		14 + 2*#motifs + 3*#spacers#	remove trailing "|" in sequence name in annotation section (PDB bug)# v2.0	major rewrite for MEME 3.0$MIN_E_SKIP = 1;	# skip sequences in this evalue range $MAX_E_SKIP = 0;	# Define the buttons used for internal and external links$ENTREZ = "http://www.ncbi.nlm.nih.gov/entrez/query.fcgi";$MBPSUB = "XXX---XXX";%buttons = (   "nentrez",   "<A HREF='$ENTREZ?db=nucleotide&cmd=Search&term=$MBPSUB&doptcmdl=GenPept'>E</A>!#DDDDFF!#000000",  "pentrez",   "<A HREF='$ENTREZ?db=protein&cmd=Search&term=$MBPSUB&doptcmdl=GenPept'>E</A>!#DDDDFF!#000000",  "hentrez", "<A HREF='\#bh'>E</A>!#DDDDFF!#000000",  "diagram", "<A HREF='\#d$MBPSUB'>D</A>!#DDFFDD!#000000",  "align",   "<A HREF='\#a$MBPSUB'>A</A>!#FFDDDD!#000000",  "score",   "<A HREF='\#s$MBPSUB'>S</A>!#DDDD88!#000000",  "help",    "<A HREF='\#bh'>?</A>!#FFFFFF!#000000",  "motifs", "<A HREF='\#motifs'><B>Dataset and Motifs</B></A>!#00FFFF!#000000",  "hm","<A HREF='\#sec_i'><B>High-scoring Sequences</B></A>!#DDFFDD!#000000",  "md","<A HREF='\#sec_ii'><B>Motif Diagrams</B></A>!#FFDDFF!#000000",  "ma", "<A HREF='\#sec_iii'><B>Annotated Sequences</B></A>!#00FF00!#000000",  "debug", "<A HREF='\#debug'><B>Debugging Information</B></A>!#DDDDFF!#000000",  "top", "<A HREF='\#top_buttons'><B>Go to top</B></A>!#DDDDFF!#000000" );## get input arguments#while ($#ARGV >= 0) {  if ($ARGV[0] eq "-e_skip") {    shift; $MIN_E_SKIP = $ARGV[0];    shift; $MAX_E_SKIP = $ARGV[0];  } else {    print stderr $usage;    exit(1);  }  shift;}# header#print "<HTML>\n<HEAD>\n<TITLE>MAST</TITLE>\n</HEAD>\n<BODY BGCOLOR=$BODY>\n";&print_header("MAST");# top button panel$button = make_button_panel("!", \%buttons, "motifs", "hm", "md", "ma","debug");$button = "<A NAME=top_buttons></A><HR>\n$button\n<BR CLEAR=LEFT>\n";# title$line = &next_section();			# get start of MAST output# detect error: first word must be "MAST" if (!($line =~ /^MAST/)) {			# MAST error; exit  print "<PRE>$line";  while (<STDIN>) { print; }   print "</PRE>";  exit(1);} else {					# no error; print buttons  print $button;}#$line = &format_section( "", "", $line );$line = &format_section( "", $line );print "$line\n";$line = &next_block();$line =~ s/(http:.*)/<A HREF='\1'>\1<\/A>/g;$line = &format_para( $line );print "$line\n";# reference$line = &next_section();#$line = &format_section( "", "", $line );$line = &format_section( "", $line );print "$line\n";$line = &next_block();$line = &format_para( $line );print "$line\n";# database and motifs$line = &next_section();$line = &format_section( "", "", $line, "motifs" );print "$line\n";$line = &next_block();%width = &read_motif( $line );# figure out type of database and motifsif ($line =~ /DATABASE[^\n]*(nucleotide)/) { $db = "n"; } else { $db = "p"; }if ($line =~ /MOTIF[^\n]*(nucleotide)/) { $mt = "n"; } else { $mt = "p"; }# decrease number of pixels per database letter if motifs are protein and DB DNA$xlate = $db eq "n" && $mt eq "p";				# tranlating DNA?if ($xlate) { $SCALE *= 3; }# figure out how are DNA strands scoredif ($db == "n") {			# DNA  if ($line =~ /are combined/) { $stype = "c"; }		# combined  if ($line =~ /are scored separately/) { $stype = "s"; }	# separate  if ($line =~ /are not scored/) { $stype = "n"; }		# norc} else {				# protein  $stype = "p";				# protein}$line = &format_pre( $line );print "$line\n";# Section I introductionif (!$brief) {  $line = &next_section();  $line = &format_section( "", "", $line, "sec_i" );  print "$line\n";  $line = &next_block();  $line = &format_list( $line, "-o" );  print "$line\n";} else {			# header already read  $line = &format_section( "", "", $line, "sec_i" );  print "$line\n";}# Section I$line = &next_block( "---" );$line = &next_block();$line = &format_sec_i( $line, $db, $stype, $xlate );print "$line\n";# Section II introduction$line = &next_section();$line = &format_section( "", "", $line, "sec_ii" );print "$line\n";if (!$brief) {  $line = &next_block( );				# get introduction  # remove "-d-" line  $line =~ s/o -d-.+//;   # remove occurrence ... occurrence line  $line =~ s/occurrence.+occurrence//;  # replace [snf] and <snf> with snf in appropriate font  $thdr = "<TABLE SUMMARY='intro' BORDER=0 CELLPADDING=0><TR ALIGN=CENTER>";  $spacer = "<TD WIDTH=10><HR SIZE=$THIN_LINE NOSHADE>";  $fspacer = "<TD WIDTH=10> <HR SIZE=$FAT_LINE NOSHADE>";  $block = "<TD CLASS='c1' WIDTH=20>";  $wblock = "<TD CLASS='cw1' WIDTH=20>";  $line =~ s/\[(\w+)\]/$thdr $spacer $block \1 $spacer<\/TABLE>/;  $line =~ s/<(\w+)>/$thdr $spacer $wblock \1 $spacer<\/TABLE>/;  $max_len = int($MAX_DIAGRAM*$SCALE);  # create a line talking about long sequences and thick lines  $line .= "o $thdr $fspacer $block 1 $fspacer <\/TABLE> ";  $line .= "Sequences longer than $max_len are not shown to scale and are indicated by thicker lines.\n";  # make an HTML list out of the introduction  $line = &format_list( $line, "-o" );  print "$line\n<BR>\n";} # print section II introduction# Section II$line = &next_block();$line = &format_diagrams( $SCALE, $MAX_DIAGRAM, $line, $db, $stype, $xlate,  1, "Expect", 1, ' ');print "$line\n";# Section III introduction$line = &next_section();$line = &format_section( "", "", $line, "sec_iii" );print "$line\n";if (!$brief) {  $line = &next_block();  $line = &format_list( $line, "-o" );  print "$line\n<HR>\n";}# Section III$line = &next_block();$line = &format_sec_iii( $db, $line );print "$line\n";# everything elseprint "<A NAME=debug></A>";print "<HR><CENTER><H3>Debugging Information</H3></CENTER><HR>\n";print "<PRE>\n";while ( <STDIN> ) { print; }print "</PRE>\n";# help for buttons$line = &button_help;print "$line\n";# button to top$button = make_button_panel("!", \%buttons, "top");print "<HR>$button<BR>\n";# finish off HTMLprint "</BODY>\n</HTML>\n";# cleanup files# note: "if ($status == 130) {cleanup(1);}" must follow $status = system(...)&cleanup($status);#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*#-*  SUBROUTINES#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-* #------------------------------------------------------------------------------# Cleanup any temporary files#------------------------------------------------------------------------------sub cleanup {  if ($_[0] eq "INT") { exit(1); } else { exit($_[0]); }}#------------------------------------------------------------------------------# read_motif:# read the motif widths## USAGE: %width = &read_motif( text );#------------------------------------------------------------------------------sub read_motif {	local( $text ) = @_;	local( @line, $width );	# split the text into lines	@line = split /\n/, $text;	for ( $i=0; $i<@line; $i++ ) {		if ( $line[$i]=~/----- / ) { last; }	}	$i++;	for ( ; $i<@line; $i++ ) {		if ( $line[$i] =~ /^\s*$/ ) { last; }		($num,$width,$seq) = split( ' ', $line[$i] );		$width{$num} = $width;	}	return( %width );}#------------------------------------------------------------------------------# format_def:# Add HTML formatting for a definition block.  ## USAGE: $new_text = &format_def( text, term_marker, definition_marker );#------------------------------------------------------------------------------sub format_def {	local ( $line, $markterm, $markdef ) = @_;	local( $out );	$out = "<DL>\n$line</DL>";	$out =~ s/$markterm/<DT>$markterm/g;	$out =~ s/$markdef/<DD>/g;	return( $out );}#------------------------------------------------------------------------------# format_tab:# format the text into a table with the specified field widths.  Text is# broken at white space.## USAGE: $text = &format_tab( N, [N widths], text );#        $text = &format_tab( 2, 100, 150, text );#------------------------------------------------------------------------------sub format_tab {	local( @param ) = @_;	local( $out, $ncol, $i, $wid, $wid_sum );	$ncol = $param[0];	$wid_sum = 0;	for ( $i=0; $i<$ncol; $i++ ) {		$wid[$i] = $param[$i+1];		$wid_sum += $wid[$i];	}	# split the text into lines	@line = split /\n/, $param[$ncol+1];	$out = "<TABLE SUMMARY='format_tab' BORDER=0 CELLPADDING=0 WIDTH=$wid_sum>\n";	foreach $l (@line) {		$out .= "     <TR>\n";		@field = split /\s+/, $l;		for ( $i=0; $i<$ncol; $i++ ) {			$out .= " <TD WIDTH=$wid[$i]>$field[$i]\n";		}	}	$out .= "</TABLE>\n";	return $out;} # format_tab#------------------------------------------------------------------------------# format_list:# format the text as a bulletted list using the characters in mark as markers# for the list items.## USAGE: $line = &format_list( text, mark );#------------------------------------------------------------------------------sub format_list {	local( $text, $mark ) = @_;	local( @markers, $m, $newm, $oldm, @line, $l, $i, $out );

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?