meme2html.pl
来自「EM算法的改进」· PL 代码 · 共 566 行 · 第 1/2 页
PL
566 行
$line .= "</SELECT>\n"; } # dna } elsif ($name =~ /Motif \d+ regular expression/) { # regular expression $line1 = &format_hidden( "$section_name$motif_num", $line ); $line1 = $line; } else { # other sections $line = &format_pre( $line ); } print "$line\n"; } $motif_num++; } # motif or summary section # print the time lines (or CPU if last block) $line = &next_block(); $line = &format_pre( $line ); print "$line\n";}## print the documentation section#$docfile = "$MEME_DIR/etc/meme-explanation.html";open(IN, "<$docfile") || die("Can not open file $docfile: $!\n");while (<IN>) { print; }close(IN);## Print the end of the HTML file.#print(&make_end);# cleanup files# note: "if ($status == 130) {cleanup(1);}" must follow $status = system(...)&cleanup($status);#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*#-* SUBROUTINES#-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*-*#------------------------------------------------------------------------------# Cleanup any temporary files#------------------------------------------------------------------------------sub cleanup { #system "rm $pgm.$$.*.tmp"; if ($_[0] eq "INT") { exit(1); } else { exit($_[0]); }} # cleanup #------------------------------------------------------------------------------# format_description## Format the Motif Description section of a MEME document.# Prints the probability matrix, information content diagram# and multilevel consensus as a single table with the columns # aligned.# USAGE: format_description( line, ext, f )#------------------------------------------------------------------------------sub format_description { my( $line, # lines of text to format $ext, # motif number to append to name $f # font to use ) = @_; my($i, $x, $w); my($result, @lines, @fields, $nf, $head, $letter, $label, $col, $mat, $diag); my($cons, $extra, $bold, $unbold, $fsize); @lines = split(/\n/, $line); # break at newline $result = "<TABLE SUMMARY='motif description' CELLPADDING=0 CELLSPACING=1>\n"; $extra = "<TD COLSPAN=8>"; # unused extra columns # # parse simplified probability matrix # undef(%majority); for ($i=0; $line = $lines[$i]; $i++) { # until blank line @fields = split(/\s+/, $line); $nf = @fields; # number of fields $head = join(" ", @fields[0..$nf-3]); # the matrix heading $letter = $fields[$nf-2]; # the letter $mat = $fields[$nf-1]; # the matrix line if ($head =~ /Simplified/) { $head = "<A NAME='simplified$ext' HREF=\"#simplified_doc\">$head</A>"; } elsif ($head =~ /pos\.|prob|matrix/) { $head = "<A HREF=\"#simplified_doc\">$head</A>"; } $result .= "<TR>$extra<TH>$head<TH ALIGN=RIGHT>$fsize$letter<TH>"; $nfields = 0; # get color group for amino acid if ($db eq "protein") { # get amino color group $c = get_color($db, $letter); $colors{$c} = 1; } foreach $x (split(//, $mat)) { $y = $x; $y = 0 if ($x eq ":"); $y = 10 if ($x eq "a"); if ($db eq "dna") { # get majority DNA letter if ($y > 5) { $majority{$nfields} = $letter; } } else { # record freq of amino category $majority{$nfields . $c} += $y; } $result .= ($nfields++ % 6 == 0) ? "<TD \n >" : "<TD>"; if ($x eq ":") { $result .= ":"; # matrix columns } else { $result .= "$fsize$x"; # matrix columns } } $result .= "\n"; # end of matrix line } # probability matrix # # parse information content diagram # $result .= "<TR><TD CLASS='invisible'>.</TD>\n"; # empty row for ($i++; $line = $lines[$i]; $i++) { # until blank line $line =~ /(\s+bits\s+|\S+\s|\(\d+\.\d+ bits\)|\s+|)\s*(\d+\.\d+) ([ \-\*]+)/; $head = $1; $label = $2; $diag = $3; $col = index($line, "-") if $diag =~ /^\-/; # save column for next if ($head =~ /^\s*bits/) { $head = "<A NAME='IC$ext'>$fsize $head</A>"; } elsif ($head =~ /Information|content/) { $head = "<A HREF=\"#IC_doc\">$fsize $head</A>"; } $result .= "<TR>$extra<TH>$fsize $head<TH ALIGN=RIGHT>$label<TH>"; if ($diag =~ /^-/) { # last row $result .= "<TD COLSPAN=" . length($diag) . "> <HR>"; } else { $nfields = 0; foreach $x (split(//, $diag)) { # get color if ($x eq "*") { $color = "black"; if ($db eq "protein") { # get majority amino category foreach $c (sort keys(%colors)) { if ($majority{$nfields . $c} > 5) { $color = $c; } } } else { # get DNA color $color = get_color($db, $majority{$nfields}); } } # get color $result .= ($nfields++ % 3 == 0) ? "<TD\n " : "<TD"; if ($x eq "*") { $result .= " BGCOLOR=\"$color\"> "; } else { $result .= ">"; } } } $result .= "\n"; # end of plot line } # info content diagram # # parse multilevel consensus sequence # $result .= "<TR><TD CLASS='invisible'>.</TD>\n"; # empty row $bold = "<B>"; $unbold = "</B>"; for ($i++; $line = $lines[$i]; $i++) { # until blank line $head = substr($line, 0, $col); $cons = substr($line, $col); if ($head =~ /Multilevel/) { $head = "<A NAME='consensus$ext' HREF=\"#consensus_doc\">$head</A>"; } elsif ($head =~ /consensus|sequence/) { $head = "<A HREF=\"#consensus_doc\">$head</A>"; } $result .= "<TR>$extra<TH>$head<TD><TH>"; $nfields = 0; foreach $x (split(//, $cons)) { $color = get_color($db, $x); $result .= ($nfields++ % 2 == 0) ? "<TD CLASS=$color\n >" : "<TD CLASS=$color>"; if ($x ne " ") { $result .= "<TT>$bold$x$unbold</TT>"; } $result .= "</TD>"; } $result .= "\n"; # end of consensus line $bold = $unbold = ""; } # multilevel consensus return( $result );} # format_description#------------------------------------------------------------------------------# format_sorted_sites## Add the aligned sites to the table created by format_description## USAGE: format_sorted_sites( line, ext, f )#------------------------------------------------------------------------------sub format_sorted_sites{ my( $line, # lines of text to format $ext, # motif number to append to name $f # font to use ) = @_; my($i, @lines, $cols, $result); my($seq, $str, $pv, $pre, $site, $post, $sp, $fsize); @lines = split(/\n/, $line); # break at newline $line = $lines[0]; # first line $cols = ($line =~ /Strand/) ? 7 : 6; # number of columns $str = ""; # if no strands $sp = " "; # space between columns # # print aligned sites # for ($i=2; $line = $lines[$i]; $i++) { # skip first two (hdr) lines if ($cols == 6) { ($seq, $start, $pv, $pre, $site, $post) = split(/\s+/, $line); } else { ($seq, $str, $start, $pv, $pre, $site, $post) = split(/\s+/, $line); } $pre = "" if ($pre eq "."); # remove placeholder $pre # print header first time thru if ($i==2) { $result .= "<TR><TD CLASS='invisible'>.</TD>\n"; # empty row $result .= "<TR><TH ALIGN=LEFT>NAME<TH>$sp"; $len = length($site); $result .= ($cols == 6) ? "<TH>$sp<TH>$sp" : "<TH>STRAND<TH>$sp"; $result .= "<TH>START<TH>$sp<TH>P-VALUE<TH>$sp"; $result .= "<TH>$sp<TH>$sp<TH>$sp"; # pre $result .= "<TH><TH COLSPAN=$len ALIGN=CENTER>"; # site $result .= "<A NAME='sites$ext' HREF=\"#sites_doc\">SITES</A>"; $result .= "<TH>$sp\n"; # post } # header $result .= "<TR><TD>$seq<TD>"; $result .= "<TD ALIGN=CENTER>$str<TD>"; $result .= "<TD ALIGN=RIGHT>$start<TD>"; $result .= "<TD ALIGN=RIGHT>$pv<TD>\n"; $result .= " <TD COLSPAN=2 ALIGN=RIGHT>$fsize<TT>$pre</TT><TD>"; $nfields = 0; foreach $x (split(//, $site)) { # print site in columns $color = get_color($db, $x); $result .= ($nfields++ % 2 == 0) ? "<TD CLASS=$color\n >" : "<TD CLASS=$color>"; $result .= "<TT><B>$x</B></TT>"; $result .= "</TD>"; } $result .= "<TD><TD><TD ALIGN=LEFT>$fsize<TT>$post</TT>\n"; } # sorted site $result .= "</TABLE>"; return($result); } # format_sorted_sites#------------------------------------------------------------------------------# format_training set## Format the training set description and set the %SEQNO global variable.##------------------------------------------------------------------------------sub format_training_set{ my( $line ) = @_; my( $i, @lines, $nlines, $seqno ); @lines = split /\n/, $line; $nlines = @lines; # find the start of the sequence lines for ($i=0; $i<$nlines; $i++) { last if ($lines[$i] =~ /^----/); } # line # get the sequence names and save in SEQNO $seqno = 0; # sequence number for ($i++; $i<$nlines; $i++) { @words = split(/\s+/, $lines[$i]); $SEQNO{$words[0]} = $seqno++; if ($words[3]) { $SEQNO{$words[3]} = $seqno++; } } # use <pre> format return(&format_pre( $line ));} # format_training set
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?