roc.txt

来自「EM算法的改进」· 文本 代码 · 共 213 行

TXT
213
字号
#!/bin/csh## $Id: roc.txt 1339 2006-09-21 19:46:28Z tbailey $# $Log$# Revision 1.2  2005/10/05 06:18:35  nadya# use full path for "rm". Asssume everybody has /bin/rm.## Revision 1.1.1.1  2005/07/30 01:19:47  nadya# Importing from meme-3.0.14, and adding configure/make##set pgm = roc set args = "$*"## set the directories we need#setenv MEME_DIRECTORY @MEMEDIR@set bin = @MEMEDIR@/bin## check for no arguments#if ($#argv < 1) then  usage:  more << USAGE  USAGE:	$pgm <exp> [-d <dataset>] [-seq] [-ps] [-s] [-m <motif_file>] [-c <c>]	  [-opt] [-T <thresh>] [-debug]		<exp>	learner output file (prepends "RESULTS/" if not found)		[-d <dataset>]	dataset to compute ROC on		[-m <motif_file>]			file (without .motifs or .tag extension)			containing known motifs; default is <training_set>			where <training_set> must be  specified in <exp> file.		[-seq]	compute sequence-level performance using 			<motif_file>.tags			default is site-level performance using			<motif_file>.motifs		[-s]	plot the ROC curves to the screen		[-ps]	create a plot of the ROC curves to a .ps file			Creates files: 				RESULTS/roc.<exp>.ps				RESULTS/roc.<exp>.tex				tmp.tex		[-opt]	optimize the recall		[-t <thresh>]			Print "none" for motif name if best ROC below <thresh>		[-c <c>]	output only first <c> motifs		[-T <thresh>]	threshold to use instead of MEME threshold		[-debug]	run using debugger	Note1: Looks for training set used by learner unless you		use -d <dataset>.  Should be be in same directory as 		when meme was run!	Note2: Looks for file <training_set>.motifs unless you use -seq,		in which case it looks for <training_set>.tag.  		File should be in same directory as dataset file used		when learner was run.	Note3: If the motifs or dataset aren't found, looks for them	       in directory datasets/.USAGE  exit 1endifonintr cleanup# get input arguments set exp = $1; shiftset threshold = "0"; set c_sw = ""; set opt = ""; set T = "";while ("$1" != "")  switch ($1)  case -d:    shift; set test_set = $1; breaksw  case -seq:    set seq_level = 1; breaksw  case -s:    set s = "-s"; set ps = ""; breaksw  case -ps:    set ps = "-ps $pgm.$exp"; breaksw  case -t:    shift; set threshold = $1; breaksw  case -T:    shift; set T = "-T $1"; breaksw  case -m:    shift; set motif_file = $1; breaksw  case -c:    shift; set c_sw = "-c $1"; breaksw  case -opt:    set opt = "-opt"; breaksw  case -debug:    set debug = 1; breaksw  default:    goto usage  endsw  shiftend# check that the meme output file existsset memefile = $expif (!(-e $memefile)) then  set memefile = RESULTS/$memefile  if (!(-e $memefile)) then    echo Meme output file \"$memefile\" not found!    exit 1  endifendif# create the log_odds matrix file set logodds = $pgm.logodds.$$.tmpset tmp = `$bin/make_logodds $memefile $logodds $c_sw $T`if ($status || $#tmp < 1) then  # print the error message from make_logodds  echo $tmp | awk '{ \    for (i=1; i<=NF; i++) { \      last = substr($i,length($i),1);\      if (last == ".") printf "%s\n", $i; else printf "%s ", $i; \    } \  }'  exit 1endifset alpha = $tmp[1]if ($#tmp == 2) then  set dataset = $tmp[2]endif# set up dataset to search; default is same one as meme was# run on.if (!($?test_set)) set test_set = $datasetif (!(-e $test_set)) set test_set = datasets/$test_set# determine prefix of <motif_file>if (!($?motif_file)) set motif_file = $dataset:r# determine type of performance, sequence- or site-level and# type of known sequence/site fileif ($?seq_level || "$opt" != "") then  set roc = "$bin/seqroc"  set motif_file = $motif_file".tag"else  set roc = "$bin/siteroc"  set motif_file = $motif_file".motifs"endifif (!(-e $motif_file)) set motif_file = datasets/$motif_file# check that motif/tag file existsif (!(-e $motif_file)) then  echo File containing motifs \"$motif_file\" not found!  /bin/rm $pgm.*.$$.tmp  exit 1endif# print a header lineset t = $test_set:techo -n "memefile $memefile dataset $t:r motif_file $motif_file"echo ""# file to hold data to plotset plotfile = $pgm.plotfile.$$.tmp# get the ROC statisticsif ($?debug) then  echo $roc $logodds $test_set $motif_file $alpha $threshold $plotfile $opt  gdb $rocelse  $roc $logodds $test_set $motif_file $alpha $threshold $plotfile $optendif # quit if not plottingif (!($?ps) && !($?s)) goto cleanup# name for files to hold individual plot dataset plotdata = $pgm.plotdata.$$.tmp# create a gawk script to create separate plot data files and# write the plotting commandsset gawk = $pgm.gawk.$$.tmpcat << "END" > $gawk  {if ($1 == "pass") {    pass = $2; motif = $4; roc = $6; file = plotdata "." pass;    printf(" %d_%s_%f %s", pass, motif, roc, file);  } else { print >> file} }"END"# get the files to plot and their labelsset labels = `gawk -v plotdata=$plotdata -f $gawk $plotfile`# create the separate plotdata files using the gawk script aboveplotgen $labels $ps -point 20\ -title "ROC curves" \ -xrange -.1 : 1.1 \ -yrange -.1 : 1.1 \ -misc "set xtics 0, .1, 1.0" \ -misc "set ytics 0, .1, 1.0" \ -xlabel "false positive proportion (fp/(tn + fp))" \ -ylabel "true positive proportion (tp/(tp + fn)" \ -key .86 .4 \ -misc "set noyzeroaxis" \ -misc "set data style linespoints" \# -misc "set grid" cleanup:/bin/rm $pgm.*.$$.tmp

⌨️ 快捷键说明

复制代码Ctrl + C
搜索代码Ctrl + F
全屏模式F11
增大字号Ctrl + =
减小字号Ctrl + -
显示快捷键?