transfac2meme.txt
来自「EM算法的改进」· 文本 代码 · 共 262 行
TXT
262 行
#!@WHICHPERL@ -w## $Id: transfac2meme.txt 1339 2006-09-21 19:46:28Z tbailey $# $Log$# Revision 1.1 2005/07/28 23:57:08 nadya# Initial revision### FILE: transfac2meme# AUTHOR: William Stafford Noble and Timothy L. Bailey# CREATE DATE: 4/22/99# DESCRIPTION: Convert a Transfac matrix file to MEME output format.# Set up global variables. Assume uniform.@bases = ("A", "C", "G", "T");$num_bases = 4;$bg{"A"} = 0.25;$bg{"C"} = 0.25;$bg{"G"} = 0.25;$bg{"T"} = 0.25;$pseudo_total = 1; # default total pseudocounts$usage = "USAGE: transfac2meme [options] <matrix file> Options: -species <name> -skip <transfac ID> (may be repeated) -ids <file containing list of transfac IDs> -bg <background file> set of f_a -pseudo <total pseudocounts> add <total pseudocounts> times f_a to each freq default: $pseudo_total N.B. Dollar signs in TRANSFAC IDs are converted to underscores.\n";# Process command line arguments.if (scalar(@ARGV) == 0) { printf(STDERR $usage); exit(1);}$species = "";$id_list = "";while (scalar(@ARGV) > 1) { $next_arg = shift(@ARGV); if ($next_arg eq "-species") { $species = shift(@ARGV); } elsif ($next_arg eq "-skip") { $skips{shift(@ARGV)} = 1; } elsif ($next_arg eq "-ids") { $id_list = shift(@ARGV); } elsif ($next_arg eq "-bg") { $bg_file = shift(@ARGV); } elsif ($next_arg eq "-pseudo") { $pseudo_total = shift(@ARGV); } else { print(STDERR "Illegal argument ($next_arg)\n"); exit(1); }}($matrix_file) = @ARGV;# Store the target IDs.%id_list = &read_list_from_file($id_list);# read the background fileif (defined($bg_file)) { open($bg_file, "<$bg_file") || die("Can't open $bg_file.\n"); $total_bg = 0; while (<$bg_file>) { next if (/^#/); # skip comments ($a, $f) = split; if ($a eq "A" || $a eq "a") { $bg{"A"} = $f; $total_bg += $f; } elsif ($a eq "C" || $a eq "c") { $bg{"C"} = $f; $total_bg += $f; } elsif ($a eq "G" || $a eq "g") { $bg{"G"} = $f; $total_bg += $f; } elsif ($a eq "T" || $a eq "t") { $bg{"T"} = $f; $total_bg += $f; } } # make sure they sum to 1 foreach $key (keys %bg) { $bg{$key} /= $total_bg; #printf STDERR "$key $bg{$key}\n"; }} # background file# Open the matrix file for reading.open($matrix_file, "<$matrix_file") || die("Can't open $matrix_file.\n");# Print the MEME header.print("MEME version 3.0\n\n");print("ALPHABET= ACGT\n\n");print("strands: + -\n\n");print("Background letter frequencies (from dataset with add-one prior applied):\n");printf("A %f C %f G %f T %f\n\n", $bg{"A"}, $bg{"C"}, $bg{"G"}, $bg{"T"});# Read the input file.$num_motifs = 0;$num_skipped = 0;while ($line = <$matrix_file>) { # Split the line into identifier and everything else. ($id, @data) = split(' ', $line); # Have we reached a new matrix? if (defined($id) && ($id eq "ID")) { $matrix_name = shift(@data); $matrix_name =~ tr/\$/_/; # Read to the beginning of the motif. $this_species = ""; # Old versions of TRANSFAC use pee-zero; new use pee-oh. while (($id ne "PO") && ($id ne "P0")) { $line = <$matrix_file>; if (! defined($line)) { die ("Can't find PO line for TRANSFAC matrix $matrix_name.\n"); } ($id, @data) = split(' ', $line); # Store the species line. if ($id eq "BF") { $this_species .= $line; } } # Read the motif. $i_motif = 0; while () { $line = <$matrix_file>; chomp($line); if (! defined $line ) { die ("Can't find `XX' line for TRANSFAC matrix $matrix_name.\n"); } ($id, @counts) = split(' ', $line); # Look for the end of the motif. if (($id eq "XX") || ($id eq "//")) { last; } # Make sure we got the right number of entries. if (scalar(@counts) != 5) { die("Invalid motif line ($line)"); } # Store the contents of this row. for ($i_base = 0; $i_base < $num_bases; $i_base++) { $motif{$i_base, $i_motif} = shift(@counts); } $i_motif++; } $width = $i_motif; # Convert the motif to frequencies. for ($i_motif = 0; $i_motif < $width; $i_motif++) { # motif columns may have different counts $num_seqs = 0; for ($i_base = 0; $i_base < $num_bases; $i_base++) { if (!defined($motif{$i_base, $i_motif})) { die("motif{$i_base, $i_motif} is not defined.\n"); } else { $num_seqs += $motif{$i_base, $i_motif}; } } for ($i_base = 0; $i_base < $num_bases; $i_base++) { $motif{$i_base, $i_motif} = ($motif{$i_base, $i_motif} + ($pseudo_total * $bg{$bases[$i_base]}) ) / ($num_seqs + $pseudo_total); } } ###### Decide whether to print the motif. # If no criteria are given, then print it. $print_it = 1; # If we were given a list, if ($id_list ne "") { # was this matrix in the list? $print_it = defined($id_list{$matrix_name}); } # If we were given a species. elsif ($species ne "") { # is this the right species? $print_it = ($this_species =~ m/$species/); if ($this_species eq "") { print(STDERR "Warning: No species given for $matrix_name.\n"); } } # Were we explicitly asked to skip this one? if (defined($skips{$matrix_name})) { $print_it = 0; } # Print the motif. if ($print_it) { $num_motifs++; print(STDERR "Printing motif $matrix_name.\n"); print("MOTIF $num_motifs $matrix_name\n\n"); print("BL MOTIF $num_motifs width=$width seqs=$num_seqs\n"); # PSSM for MAST print("log-odds matrix: alength= $num_bases w= $width n= 0 bayes= 0 E= 0\n"); for ($i_motif = 0; $i_motif < $width; $i_motif++) { for ($i_base = 0; $i_base < $num_bases; $i_base++) { printf("%7.3f ", log( $motif{$i_base, $i_motif} / $bg{$bases[$i_base]} ) /log(2.0) ); } print("\n"); } # PSFM for Meta-MEME print("letter-probability matrix: "); print("alength= $num_bases w= $width nsites= $num_seqs E= 0\n"); for ($i_motif = 0; $i_motif < $width; $i_motif++) { for ($i_base = 0; $i_base < $num_bases; $i_base++) { printf("%7.3f ", $motif{$i_base, $i_motif}); } print("\n"); } print("\n"); } else { $num_skipped++; #print(STDERR "Skipping motif $matrix_name.\n"); } }}print(STDERR "Converted $num_motifs motifs.\n");print(STDERR "Skipped $num_skipped motifs.\n");close($matrix_file);sub read_list_from_file { my($id_list) = @_; my($line, %return_value); if ($id_list eq "") { return(%return_value); } open($id_list, "<$id_list") || die("Can't open $id_list."); while ($line = <$id_list>) { chomp($line); @words = split(' ', $line); foreach $word (@words) { $return_value{$word} = 1; } } close($id_list); return(%return_value);}
⌨️ 快捷键说明
复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?