meme2html.pl

来自「EM算法的改进」· PL 代码 · 共 566 行 · 第 1/2 页

PL
566
字号
  	  $line .= "</SELECT>\n";         } # dna      } elsif ($name =~ /Motif \d+ regular expression/) {	# regular expression	$line1 = &format_hidden( "$section_name$motif_num", $line );        $line1 =  $line;      } else {		# other sections	$line = &format_pre( $line );      }      print "$line\n";    }    $motif_num++;  } # motif or summary section  # print the time lines (or CPU if last block)  $line = &next_block();  $line = &format_pre( $line );  print "$line\n";}## print the documentation section#$docfile = "$MEME_DIR/etc/meme-explanation.html";open(IN, "<$docfile") || die("Can not open file $docfile: $!\n");while (<IN>) { print; }close(IN);## Print the end of the HTML file.#print(&make_end);# cleanup files# note: "if ($status == 130) {cleanup(1);}" must follow $status = system(...)&cleanup($status);#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*#-*  SUBROUTINES#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*#------------------------------------------------------------------------------# Cleanup any temporary files#------------------------------------------------------------------------------sub cleanup {  #system "rm $pgm.$$.*.tmp";  if ($_[0] eq "INT") { exit(1); } else { exit($_[0]); }} # cleanup #------------------------------------------------------------------------------# 	format_description##	Format the Motif Description section of a MEME document.#	Prints the probability matrix, information content diagram#	and multilevel consensus as a single table with the columns #	aligned.#	USAGE: format_description( line, ext, f )#------------------------------------------------------------------------------sub format_description {  my(    $line, 					# lines of text to format    $ext, 					# motif number to append to name    $f						# font to use  ) = @_;  my($i, $x, $w);  my($result, @lines, @fields, $nf, $head, $letter, $label, $col, $mat, $diag);  my($cons, $extra, $bold, $unbold, $fsize);  @lines = split(/\n/, $line);			# break at newline  $result = "<TABLE SUMMARY='motif description' CELLPADDING=0 CELLSPACING=1>\n";  $extra = "<TD COLSPAN=8>";			# unused extra columns  #  # parse simplified probability matrix  #  undef(%majority);  for ($i=0; $line = $lines[$i]; $i++) {	# until blank line    @fields = split(/\s+/, $line);    $nf = @fields;				# number of fields    $head = join(" ", @fields[0..$nf-3]);	# the matrix heading    $letter = $fields[$nf-2];			# the letter    $mat = $fields[$nf-1];			# the matrix line    if ($head =~ /Simplified/) {       $head = "<A NAME='simplified$ext' HREF=\"#simplified_doc\">$head</A>";    } elsif ($head =~ /pos\.|prob|matrix/) {      $head = "<A HREF=\"#simplified_doc\">$head</A>";    }    $result .= "<TR>$extra<TH>$head<TH ALIGN=RIGHT>$fsize$letter<TH>";    $nfields = 0;    # get color group for amino acid    if ($db eq "protein") {			# get amino color group      $c = get_color($db, $letter);      $colors{$c} = 1;    }    foreach $x (split(//, $mat)) {      $y = $x; $y = 0 if ($x eq ":"); $y = 10 if ($x eq "a");      if ($db eq "dna") {			# get majority DNA letter        if ($y > 5) { $majority{$nfields} = $letter; }       } else {					# record freq of amino category        $majority{$nfields . $c} += $y;       }       $result .= ($nfields++ % 6 == 0) ? "<TD \n  >" : "<TD>";      if ($x eq ":") {        $result .= ":";				# matrix columns      } else {        $result .= "$fsize$x";			# matrix columns      }    }    $result .= "\n";			# end of matrix line  } # probability matrix  #  # parse information content diagram  #  $result .= "<TR><TD CLASS='invisible'>.</TD>\n";	# empty row  for ($i++; $line = $lines[$i]; $i++) {	# until blank line    $line =~       /(\s+bits\s+|\S+\s|\(\d+\.\d+ bits\)|\s+|)\s*(\d+\.\d+) ([ \-\*]+)/;    $head = $1;    $label = $2;    $diag = $3;    $col = index($line, "-") if $diag =~ /^\-/;	# save column for next    if ($head =~ /^\s*bits/) {       $head = "<A NAME='IC$ext'>$fsize $head</A>";    } elsif ($head =~ /Information|content/) {      $head = "<A HREF=\"#IC_doc\">$fsize $head</A>";    }    $result .= "<TR>$extra<TH>$fsize $head<TH ALIGN=RIGHT>$label<TH>";    if ($diag =~ /^-/) {			# last row      $result .= "<TD COLSPAN=" . length($diag) . "> <HR>";    } else {      $nfields = 0;      foreach $x (split(//, $diag)) {	# get color 	if ($x eq "*") {          $color = "black";	  if ($db eq "protein") {		# get majority amino category	    foreach $c (sort keys(%colors)) { 	      if ($majority{$nfields . $c} > 5) { $color = $c; }	    }	  } else {				# get DNA color	    $color = get_color($db, $majority{$nfields});	  }	} # get color        $result .= ($nfields++ % 3 == 0) ? "<TD\n  " : "<TD";	if ($x eq "*") {	  $result .= " BGCOLOR=\"$color\">&nbsp;";	} else {	  $result .= ">"; 	}      }    }    $result .= "\n";			# end of plot line  } # info content diagram  #  # parse multilevel consensus sequence  #  $result .= "<TR><TD CLASS='invisible'>.</TD>\n";	# empty row  $bold = "<B>";  $unbold = "</B>";  for ($i++; $line = $lines[$i]; $i++) {	# until blank line    $head = substr($line, 0, $col);    $cons = substr($line, $col);    if ($head =~ /Multilevel/) {       $head = "<A NAME='consensus$ext' HREF=\"#consensus_doc\">$head</A>";    } elsif ($head =~ /consensus|sequence/) {      $head = "<A HREF=\"#consensus_doc\">$head</A>";    }    $result .= "<TR>$extra<TH>$head<TD><TH>";    $nfields = 0;    foreach $x (split(//, $cons)) {      $color = get_color($db, $x);      $result .= ($nfields++ % 2 == 0) ? "<TD CLASS=$color\n  >" : "<TD CLASS=$color>";      if ($x ne " ") {	$result .= "<TT>$bold$x$unbold</TT>";       }      $result .= "</TD>";    }    $result .= "\n";			# end of consensus line    $bold = $unbold = "";  } # multilevel consensus  return( $result );} # format_description#------------------------------------------------------------------------------# 	format_sorted_sites##	Add the aligned sites to the table created by format_description##	USAGE: format_sorted_sites( line, ext, f )#------------------------------------------------------------------------------sub format_sorted_sites{  my(    $line, 					# lines of text to format    $ext, 					# motif number to append to name    $f						# font to use  ) = @_;  my($i, @lines, $cols, $result);  my($seq, $str, $pv, $pre, $site, $post, $sp, $fsize);  @lines = split(/\n/, $line);			# break at newline  $line = $lines[0];				# first line  $cols = ($line =~ /Strand/) ? 7 : 6;		# number of columns  $str = "";					# if no strands  $sp = "&nbsp;";				# space between columns  #  # print aligned sites  #  for ($i=2; $line = $lines[$i]; $i++) {	# skip first two (hdr) lines    if ($cols == 6) {      ($seq, $start, $pv, $pre, $site, $post) = split(/\s+/, $line);    } else {      ($seq, $str, $start, $pv, $pre, $site, $post) = split(/\s+/, $line);    }    $pre = "" if ($pre eq ".");			# remove placeholder $pre    # print header first time thru    if ($i==2) {      $result .= "<TR><TD CLASS='invisible'>.</TD>\n";	# empty row      $result .= "<TR><TH ALIGN=LEFT>NAME<TH>$sp";      $len = length($site);      $result .= ($cols == 6) ?  "<TH>$sp<TH>$sp" : "<TH>STRAND<TH>$sp";      $result .= "<TH>START<TH>$sp<TH>P-VALUE<TH>$sp";      $result .= "<TH>$sp<TH>$sp<TH>$sp";			# pre      $result .= "<TH><TH COLSPAN=$len ALIGN=CENTER>";	# site      $result .= "<A NAME='sites$ext' HREF=\"#sites_doc\">SITES</A>";      $result .= "<TH>$sp\n";			# post    } # header    $result .= "<TR><TD>$seq<TD>";    $result .= "<TD ALIGN=CENTER>$str<TD>";    $result .= "<TD ALIGN=RIGHT>$start<TD>";    $result .= "<TD ALIGN=RIGHT>$pv<TD>\n";    $result .= "  <TD COLSPAN=2 ALIGN=RIGHT>$fsize<TT>$pre</TT><TD>";    $nfields = 0;    foreach $x (split(//, $site)) {		# print site in columns      $color = get_color($db, $x);      $result .= ($nfields++ % 2 == 0) ? "<TD CLASS=$color\n  >" : "<TD CLASS=$color>";      $result .= "<TT><B>$x</B></TT>";      $result .= "</TD>";    }    $result .= "<TD><TD><TD ALIGN=LEFT>$fsize<TT>$post</TT>\n";  } # sorted site  $result .= "</TABLE>";  return($result); } # format_sorted_sites#------------------------------------------------------------------------------# 	format_training set##	Format the training set description and set the %SEQNO global variable.##------------------------------------------------------------------------------sub format_training_set{  my( $line ) = @_;  my( $i, @lines, $nlines, $seqno );  @lines = split /\n/, $line;  $nlines = @lines;  # find the start of the sequence lines  for ($i=0; $i<$nlines; $i++) {    last if ($lines[$i] =~ /^----/);  } # line  # get the sequence names and save in SEQNO  $seqno = 0;                     		# sequence number  for ($i++; $i<$nlines; $i++) {    @words = split(/\s+/, $lines[$i]);    $SEQNO{$words[0]} = $seqno++;    if ($words[3]) { $SEQNO{$words[3]} = $seqno++; }  }  # use <pre> format  return(&format_pre( $line ));} # format_training set

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?