📄 roc.pm

📁 A Perl module implementing receiver-operator-characteristic (ROC) curves with nonparametric confid
💻 PM
📖 第 1 页 / 共 2 页
字号:
12 下一页
#LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLL##  ROC.pm - A Perl module implementing receiver-operator-characteristic (ROC)#           curves with nonparametric confidence bounds##     Copyright (c) 1998 Hans A. Kestler. All rights reserved.#     This program is free software; you may redistribute it and/or#     modify it under the same terms as Perl itself.##  This code implements a method for constructing nonparametric confidence#  for ROC curves described in     #        R.A. Hilgers, Distribution-Free Confidence Bounds for ROC Curves, #        Meth Inform Med 1991; 30:96-101#  Additionally some auxilliary functions were ported (and corrected) from #  Fortran (Applied Statistics, ACM).##  Written in Perl by Hans A. Kestler.#  Bugs, comments, suggestions to: #              Hans A. Kestler <hans.kestler@uni-ulm.de>#                              <h.kestler@ieee.org>###LLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLLpackage Statistics::ROC;require 5;use Carp;use strict;use vars qw($VERSION @ISA @EXPORT @EXPORT_OK);require Exporter;@ISA = ('Exporter');@EXPORT = qw(     roc rank loggamma betain Betain xinbta Xinbta	);$VERSION = '0.01';# Algorithm 291, Logarithm of the gamma function. #                in Collected Algorithms of the ACM, Vol II, 1980# M.C. Pike and I.D. Hill with remark by M.R. Hoare# see also Pike, M.C. and Hill, I.D. (1966). Algorithm 291. Logarithm of the#          gamma function. Commun. Ass. Comput. Mach., 9,684.sub loggamma($){    # This procedure evaluates the natural logarithm of gamma(x) for all    # x>0, accurate to 10 decimal places. Stirlings formula is used for the    # central polynomial part of the procedure        my $x= shift; # default arg is @_    my ($f, $z);            if($x==0){return 743.746924740801} # this is: loggamma(9.9999999999E-324)        if($x < 7)    {       for($z=$x,$f=1;$z<7;$z++)       {       $x=$z; $f*=$z;       }       $x++;       $f= -log($f); # log returns the natural logarithm    }    else{ $f=0;}    $z=1/($x*$x);    return $f+($x-.5)*log($x)-$x+.918938533204673+       (((-.000595238095238*$z+.000793650793651)*$z-           .002777777777778)*$z+.083333333333333)/$x;}# Algorithm AS 63 with remark AS R19, # Computes incomplete beta function ratio #  K.L. Majumder and G.P. Bhattacharjee (1973). The Incomplete Beta Integral,#  Appl. Statist.,22:409:411  and#  G.W. Cran, K.J. Martin and G.E. Thomas (1977).Remark AS R19 and #  Algorithm AS109, A Remark on Algorithms AS 63: The Incomplete Beta Integral#  AS 64: Inverse of the Incomplete Beta Function Ratio, #  Appl. Statist., 26:111-114.## Remarks:#    Complete beta function: B(p,q)=gamma(p)*gamma(q)/gamma(p+q)#                       log(B(p,q))=ln(gamma(p))+ln(gamma(q))-ln(gamma(p+q))##    Incomplete beta function ratio:#                 I_x(p,q)=1/B(p,q) * \int_0^x t^{p-1}*(1-t)^{q-1} dt##    --> log(B(p,q)) has to be supplied to calculate I_x(p,q)#    log denotes the natural logarithm#        $beta = log(B(p,q))#        $x    = x#        $p    = p#        $q    = q#    The subroutine returns I_x(p,q). If an error occurs a negative value #    {-1,-2} is returned. sub betain($$$$){    # Computes incomplete beta function ratio for arguments    # $x between zero and one, $p and $q positive.    # Log of complete beta function, $beta, is assumed to be known.        my ($x, $p, $q, $beta) = @_;    my ($xx, $psq, $cx, $pp, $qq, $index, $betain,         $ns, $term, $ai, $rx, $temp);    my $ACU=1.0E-14;  # accuracy        # tests for admissibility of arguments    if($p<=0 || $q<=0){ return -1;}     if($x<0  || $x>1) { return -2;}    # change tail if necessary and determine s    $psq=$p+$q; $cx=1-$x;    if($p<$psq*$x){ $xx=$cx; $cx=$x; $pp=$q; $qq=$p; $index=1;}    else{ $xx=$x; $pp=$p; $qq=$q; $index=0;}    $term=1; $ai=1; $betain=1;     $ns=$qq+$cx*$psq;        # use Soper's reduction formulae    $rx=$xx/$cx;    do{        if($ns>=0){$temp=$qq-$ai; if($ns==0){$rx=$xx;}}        else{ $temp=$psq; $psq++;}        $term *= $temp*$rx/($pp+$ai);        $betain+=$term;        $temp=abs($term); $ai++; $ns--;}    until($temp<=$ACU && $temp<=$ACU*$betain);        # calculate result    $betain *= exp($pp*log($xx)+($qq-1)*log($cx)-$beta)/$pp;    if($index){ return 1-$betain;}    else{ return $betain;}}        sub Betain($$$){    # Computes the incomplete beta function     # by calling loggamma() and betain()    my ($x, $p, $q) = @_;        if($x==1){return 1;}    elsif($x==0){return 0;}    else{ return betain($x, $p, $q,loggamma($p)+loggamma($q)-loggamma($p+$q));}}sub max($$){    # computes the maximum of two numbers    my ($a, $b) = @_;        if($a>$b){ return $a;}    else{ return $b;}}# Algorithm AS 109,# Computes inverse of incomplete beta function ratio#  G.W. Cran, K.J. Martin and G.E. Thomas (1977).Remark AS R19 and #  Algorithm AS109, A Remark on Algorithms AS 63: The Incomplete Beta Integral#  AS 64: Inverse of the Incomplete Beta Function Ratio, #  Appl. Statist., 26:111-114.##  Remark AS R83 and the correction in vol40(1) of Appl. Statist.(1991), p.236 #  have been incorporated in this version.#  K.J. Berry, P.W. Mielke, Jr and G.W. Cran (1990) Algorithm AS R83, A Remark#  on Algorithm AS 109: Inverse of the Incomplete Beta Function Ratio,#  Appl. Statist., 39:309-310. ## Remarks:# #    Complete beta function: B(p,q)=gamma(p)*gamma(q)/gamma(p+q)#                       log(B(p,q))=ln(gamma(p))+ln(gamma(q))-ln(gamma(p+q))##    Incomplete beta function ratio:#              alpha = I_x(p,q) = 1/B(p,q) * \int_0^x t^{p-1}*(1-t)^{q-1} dt##    --> log(B(p,q)) has to be supplied to calculate I_x(p,q)#    log denotes the natural logarithm#        $beta = log(B(p,q))#        $alpha= I_x(p,q)#        $p    = p#        $q    = q#    The subroutine returns x. If an error occurs a negative value {-1,-2,-3}#    is returned.      sub xinbta($$$$){    # Computes inverse of incomplete beta function ratio    # for given positive values of the arguments $p and $q,    # $alpha between zero and one.    # Log of complete beta function, $beta is assumed to be known.       #    # copyright by H.A. Kestler, 1998    my ($p, $q, $beta, $alpha) = @_;    my ($a, $y, $pp, $qq, $index, $r, $h, $t, $w, $xinbta,        $yprev, $prev,$s, $sq, $tx, $adj, $g);    my $SAE=-37;    my $FPU=10**$SAE;    my $ACU;        # test for admissibility of parameters    if($p<=0 || $q<=0){ return -1;}     if($alpha<0   || $alpha>1) { return -2;}    if($alpha==0  || $alpha==1){ return $alpha;}        # change tail if necessary    if($alpha>.5){ $a=1-$alpha; $pp=$q; $qq=$p; $index=1;}    else{ $a=$alpha; $pp=$p; $qq=$q; $index=0;}        # calculate the initial approximation    $r=sqrt(-log($a*$a));    $y=$r-(2.30753+.27061*$r)/(1+(.99229+.04481*$r)*$r);    if($pp>1 && $qq > 1)    {      $r=($y*$y-3)/6; $s=1/($pp+$pp-1); $t=1/($qq+$qq-1);      $h=2/($s+$t);       $w=$y*sqrt($h+$r)/$h-($t-$s)*($r+5/6-2/(3*$h));      $xinbta=$pp/($pp+$qq*exp($w+$w));    }    else    {      $r=$qq+$qq; $t=1/(9*$qq);      $t=$r*(1-$t+$y*sqrt($t))**3;      if($t<=0){         $xinbta=1-exp((log((1-$a)*$qq)+$beta)/$qq);      }      else{         $t=(4*$pp+$r-2)/$t;        if($t<=1){ $xinbta=exp((log($a*$pp)+$beta)/$pp);}        else{ $xinbta=1-2/($t+1);}      }    }            # solve for $x by a modified newton-raphson method    # using subroutine betain()    $r=1-$pp; $t=1-$qq; $yprev=0; $sq=1; $prev=1;    if($xinbta<.0001){ $xinbta=.0001;}    if($xinbta>.9999){ $xinbta=.9999;}    $ACU=10**(max(-5/$pp**2-1/$a**.2-13,$SAE));    do{        $y=betain($xinbta,$pp,$qq,$beta);        if($y==-1 || $y==-2){ return -3;}  # betain returns an exception        $y=($y-$a)*exp($beta+$r*log($xinbta)+$t*log(1-$xinbta));        if($y*$y<=0){ $prev=max($sq,$FPU);}        $g=1;        Label10: do{                     do{ $adj=$g*$y; $sq=$adj*$adj; $g/=3;} while($sq>=$prev);                     $tx=$xinbta-$adj;}                 until($tx>=0 && $tx<=1);        if($prev<=$ACU || $y*$y<=$ACU){ goto Label12;}        if($tx==0 || $tx==1){ goto Label10;}        $xinbta=$tx; $yprev=$y;}     until($adj==0);          Label12:     if($index){ return 1-$xinbta;}     else{ return $xinbta;}}sub Xinbta($$$){    # Computes the inverse of the incomplete beta function     # by calling loggamma() and xinbta()    #    # copyright by H.A. Kestler, 1998    my ($p, $q, $alpha) = @_;        if($alpha==1){return 1;}    elsif($alpha==0){return 0;}    else{ return xinbta($p, $q,loggamma($p)+loggamma($q)-loggamma($p+$q),                        $alpha);}}sub rank($\@){    # Computes the ranks of the values specified as the second     # argument (an array). Returns a vector of ranks     # corresponding to the input vector.    # Different types of ranking are possible ('high', 'low', 'mean'),    # and are specified as first argument.    # These differ in the way ties of the input vector, i.e. identical    # values, are treated:     #    'high' --> replace ranks of identical values with their    #               highest rank    #    'low'  --> replace ranks of identical values with their    #               lowest rank    #    'mean' --> replace ranks of identical values with the mean    #               of their rank    #    # copyright by H.A. Kestler, 1998        my ($type, $r) = @_; # $type: type of ranking 'high', 'low' or 'mean'                          # $r: reference to array of values to be ranked       my (@s, $s, $i, @e, @rk, $rk_m);        # calculate initial rank's     @s=sort{$$r[$a]<=>$$r[$b]} 0..$#{$r}; # sort idx num. by values of @r     for($i=0,@rk=@s;$i<@rk;$i++){ $rk[$s[$i]]=$i+1;} # set rank's         # treat ties    for($i=1,@e=(); $i<@s; $i++){       if($$r[$s[$i]]==$$r[$s[$i-1]]){ # test if there are ties          push @e,$i-1;}   # save index numbers of tied values (minus 1)       elsif(@e){ # ties have occured and are now being treated          if($type eq'mean'){  # calculate mean value of tied ranks             $rk_m=0;              for(@e,$e[-1]+1){ $rk_m+=$rk[$s[$_]];} $rk_m/=@e+1;          }          for(@e,$e[-1]+1){              if($type    eq 'high'){ $rk[$s[$_]]=$rk[$s[$e[-1]+1]];}              elsif($type eq 'low' ){ $rk[$s[$_]]=$rk[$s[$e[0]]];}              elsif($type eq 'mean'){ $rk[$s[$_]]=$rk_m;}              else{ croak "Wrong type of ranking (high|low|mean).\n";}          }          @e=(); # reinitialize @e       }    }    return @rk;}sub locate(\@$){     # Routine to find the index for table lookup which is below    # the value to be interpolated.    # Given a reference to an array $xx and a value $x a value $j    # is returned such that $x is between $xx[$j] and $xx[$j+1].    # $xx must be monotonic, either increasing or decreasing.    #    # This routine is adapted from "Numerical Recipes in C",    # second edition, by Press, Teukolsky, Vetterling and Flannery,    # Cambridge University Press, 1992.    # It uses bisection to find the right place, which has a    # comutational complexity of O(log_2(n)).    #    # copyright by H.A. Kestler, 1998    my ($xx,$x)=@_;    my ($jl,$ju)=(0,$#{$xx}); # initialize lower and upper limits    my ($jm,$ascend);        $ascend=$$xx[$ju] > $$xx[0];        # test if $x is inside of the array    if(($x>$$xx[$ju] || $x<$$xx[$jl]) && $ascend)       { croak "Value out of range for table lookup (1): $x.\n";}    if(($x<$$xx[$ju] || $x>$$xx[$jl]) && !$ascend)       { croak "Value out of range for table lookup (2): $x.\n";}        while(($ju-$jl)>1) {                  # If we are not yet done          $jm=int(($ju+$jl)/2);           # compute a midpoint,          if($x > $xx->[$jm] == $ascend)             { $jl=$jm;}                   # and replace either the lower limit          else             { $ju=$jm;}                   # or the upper limit, as appropriate.    }    return $jl;}sub linlocate(\@$$){     # Routine to find the index for table lookup which is below    # the value to be interpolated.    # Given a reference to an array $xx and a value $x a value $j    # is returned such that $x is between $xx[$j] and $xx[$j+1].    # $xx must be monotonic, either increasing or decreasing.    #    # Starts searching linearly from an initial index value    # provided as the third argument.    # If no index value can be found a negative value is     # returned, i.e. -1.    #    # copyright by H.A. Kestler, 1998     my ($xx,$x,$index)=@_;     my ($jl,$ju)=(0,$#{$xx}); # initialize lower and upper limits    my $ascend;        $ascend=$$xx[$ju] > $$xx[0];        # test if $x is inside of the array    if(($x>$$xx[$ju] || $x<$$xx[$jl]) && $ascend)       { croak "Value out of range for table lookup.\n";}    if(($x<$$xx[$ju] || $x>$$xx[$jl]) && !$ascend)       { croak "Value out of range for table lookup.\n";}        # step through the table sequentially        if($ascend && $xx->[$index]<$x){     # ascending       while($x>$xx->[$index] and $index<=$ju) { $index++;}}    elsif(!$ascend && $xx->[$index]>$x){ # descending       while($x<$xx->[$index] and $index<=$ju) { $index++;}}    else{ return -1;}    # starting index is too high        return $index-1;}
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -