📄 subset.f90
字号:
PROGRAM subset
! Interactive program for finding best-fitting subsets of variables.
! Author: Alan Miller
! Retired from CSIRO Mathematical & Information Sciences, Melbourne, Australia
! amiller @ bigpond.net.au
! http://www.ozemail.com.au/~milleraj
! http://users.bigpond.net.au/amiller/
! Latest revision - 7 August 2002
! Thanks to Paul Mather, Nottingham, UK for:
! 1. i0 undefined when form_red_file is called.
! 2. Suggesting that END FILE be replaced with CLOSE.
! 3. Several changes to form_red_file to correctly handle the case in which
! a constant is not being fitted.
! Thanks to Wei-Yin Loh, Univ. of Wisconsin, USA for pointing out an error
! in a call to REORDR in the code for cross-validation.
! Other changes:
! 1. In SUBROUTINE update, the INTENT of xmin, xmax, xmean & sxx has been
! changed to IN OUT (previously OUT).
USE lsq
USE find_subsets
IMPLICIT NONE
CHARACTER (LEN = 40) :: fname_dat, fname_red, fname_rpt
CHARACTER (LEN = 1) :: ans, bel = CHAR(7), yesno
CHARACTER (LEN = 10) :: string
INTEGER :: nvar, nvar_max, first, last, ier, i, nsize, pos, j, &
ypos, in, dimc, nv, i0, best_size, line1, nrepl, &
search_method, criterion, iostatus, rank_deficit, ndf
INTEGER, ALLOCATABLE :: list(:), seed(:), order_copy(:)
LOGICAL, ALLOCATABLE :: lindep(:)
REAL (dp) :: one = 1.0, fin, fout, total_sumsq, r2, msep, press, &
y, e, h
REAL (dp), ALLOCATABLE :: cormat(:), ycorr(:), beta(:), x(:), xcopy(:)
CHARACTER (LEN = 8), ALLOCATABLE :: vname(:)
CHARACTER (LEN = 8) :: yname
CHARACTER (LEN = 42) :: version = 'Subset version 1.11, date 9 May 2001'
CHARACTER (LEN = 32) :: method(4) = (/ 'Stepwise (Efroymson) ', &
'Sequential replacement ', &
'2-at-a-time replacement ', &
'Best subsets (exhaustive search)' /)
CHARACTER (LEN = 14) :: crit_name(5) = (/ 'AIC (Akaike) ', 'BIC (Bayesian)',&
'Mallows Cp ', 'Hannan-Quinn ',&
'F-ratio = 4.0 ' /)
LOGICAL :: fit_const, lsel = .false., OK
REAL :: var, Cp, Cp_last, fmax, f1, f5, f10, zero = 0.0
CALL start() ! Read in data and form QR reduction, if necessary
WRITE(*, *)'Enter maximum size of subset to be considered: '
READ(*, *) nvar_max
IF (nvar_max > nvar) THEN
WRITE(*, *)'Sorry, too many. Reset to ', nvar
nvar_max = nvar
END IF
WRITE(11, *)'Max. subset size = ', nvar_max
WRITE(*, *)'How many subsets of each size do you want recorded?: '
READ(*, *) nbest
WRITE(11, *)'No. of best subsets = ', nbest
WRITE(11, 950) (i, vname(i), i=1, nvar)
CALL init_subsets(nvar_max, fit_const)
IF (fit_const) THEN
i0 = 1
total_sumsq = rss(1)
ELSE
i0 = 0
total_sumsq = rss(1) + d(1)*rhs(1)**2
END IF
! Display subset selection menu and ask for choice.
DO
WRITE(*, *)
WRITE(*, '(30x, a)') 'Subset selection menu'
WRITE(*, *) 'C Correlations & partial correlations F Forward selection'
WRITE(*, *) 'E Efroymson stepwise regression B Backward elimination'
WRITE(*, *) 'R Replacement sequentially X Exhaustive search'
WRITE(*, *) '2 Two-at-a-time replacement D Display best found'
WRITE(*, *) 'I Force variables IN O Force variables OUT'
WRITE(*, *) 'L Least-squares regression coeffs. V Show variable names'
WRITE(*, *) 'M Mallows Cp for best subsets P PRESS statistic'
WRITE(*, *) 'S Stochastic + replacement of pairs Q Quit this menu'
WRITE(*, *)
WRITE(*, *) 'N.B. Exhaustive search can be very slow.'
WRITE(*, *) 'Use E, F, B, R and/or 2 first to establish good bounds'
WRITE(*, *)
WRITE(*, *) 'Enter your choice (upper or lower case OK): '
READ(*, *) ans
SELECT CASE (ans)
CASE ('c', 'C') ! Correlations & partial correlations
IF (fit_const) THEN
in = 1
ELSE
in = 0
END IF
nv = 0
WRITE(*, *)'Do you want partial correlations? (Y/N): '
READ(*, *) yesno
IF (yesno .EQ. 'y' .OR. yesno .EQ. 'Y') THEN
WRITE(*, *)'How many variables are to be forced in?: '
READ(*, *) nv
IF (nv > 0) THEN
WRITE(*, 950) (i, vname(i), i=1, nvar)
ALLOCATE(list(nv))
WRITE(*, *)'Enter numbers of variables to be forced in: '
READ(*, *) list
CALL reordr(list, nv, in+1, ier)
in = in + nv
END IF
END IF
dimc = (last-in)*(last-in-1)/2
ALLOCATE(cormat(dimc), ycorr(last))
CALL partial_corr(in, cormat, dimc, ycorr, ier)
CALL print_correlations()
IF (nv > 0) DEALLOCATE(list)
DEALLOCATE(cormat, ycorr)
CASE ('f', 'F') ! Forward selection
CALL forwrd(first, last, ier)
WRITE(*, *) 'Forward selection:'
WRITE(11, *) 'Forward selection:'
IF (fit_const) THEN
WRITE(*, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar_max)
WRITE(11, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar_max)
900 FORMAT(' Order Variable Resid.sum of sq.'/ &
(' ', i3, ' ', a8, ' ', g15.7))
ELSE
WRITE(*, 900) (i, vname(vorder(i)), rss(i), i=1,nvar_max)
WRITE(11, 900) (i, vname(vorder(i)), rss(i), i=1,nvar_max)
END IF
CASE ('e', 'E') ! Efroymson stepwise regression
WRITE(*, *)'Enter F-to-add value (default = 4.0): '
READ(*, '(a)') string
IF (LEN_TRIM(string) .EQ. 0) THEN
fin = 4.0
ELSE
READ(string, *) fin
END IF
WRITE(*, *)'Enter F-to-remove value (default = 4.0): '
READ(*, '(a)') string
IF (LEN_TRIM(string) .EQ. 0) THEN
fout = 4.0
ELSE
READ(string, *) fout
END IF
WRITE(*, 910) fin, fout
WRITE(11, 910) fin, fout
910 FORMAT(' Efroymson stepwise regression algorithm'/ &
' F-to-add = ', f8.2, ' F-to-remove = ', f8.2)
CALL efroym(first, last, fin, fout, nsize, ier, 11)
WRITE(*, *) 'Size =', nsize
WRITE(*, 920) vname(vorder(1:nsize))
WRITE(11, 920) vname(vorder(1:nsize))
920 FORMAT(' Selected variables'/ (' ', 8a9))
WRITE(*, 930) rss(nsize)
WRITE(11, 930) rss(nsize)
930 FORMAT(' Residual sum of squares for this model = ', g15.7/)
CASE ('b', 'B') ! Backward elimination
CALL bakwrd(first, last, ier)
WRITE(*, *) 'Backward elimination:'
WRITE(11, *) 'Backward elimination:'
IF (fit_const) THEN
WRITE(*, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar)
WRITE(11, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar)
ELSE
WRITE(*, 900) (i, vname(vorder(i)), rss(i), i=1,nvar)
WRITE(11, 900) (i, vname(vorder(i)), rss(i), i=1,nvar)
END IF
CASE ('r', 'R') ! Replacement sequentially
CALL seqrep(first, last, ier)
WRITE(*, *) 'Sequential replacement:'
WRITE(11, *) 'Sequential replacement:'
WRITE(*, 920) vname(vorder(1:max_size))
WRITE(11, 920) vname(vorder(1:max_size))
WRITE(*, 930) rss(max_size)
WRITE(11, 930) rss(max_size)
CASE ('x', 'X') ! Exhaustive search
CALL xhaust(first, last, ier)
WRITE(*, *) 'Exhaustive search:'
WRITE(11, *) 'Exhaustive search:'
j = max_size*(max_size-1)/2 + 1
WRITE(*, 920) vname(lopt(j:j+max_size-1, 1))
WRITE(11, 920) vname(lopt(j:j+max_size-1, 1))
WRITE(*, 930) ress(max_size,1)
WRITE(11, 930) ress(max_size,1)
CASE ('2') ! Two-at-a-time replacement
CALL seq2(first, last, ier)
WRITE(*, *) 'Two-at-a-time sequential replacement:'
WRITE(11, *) 'Two-at-a-time sequential replacement:'
WRITE(*, 920) vname(vorder(1:max_size))
WRITE(11, 920) vname(vorder(1:max_size))
WRITE(*, 930) rss(max_size)
WRITE(11, 930) rss(max_size)
CASE ('s', 'S') ! Stochastic + replacement of pairs
WRITE(*, *) 'Random start, two-at-a-time replacement:'
WRITE(11, *) 'Random start, two-at-a-time replacement:'
CALL set_seed()
WRITE(*, '(a)', ADVANCE='NO') ' How many variables in subset? '
READ(*, *) nv
WRITE(11, *) 'No. of variables (excl. constant) = ', nv
WRITE(*, '(a)', ADVANCE='NO') ' How many replications? '
READ(*, *) nrepl
WRITE(11, *) 'No. of replicates = ', nrepl
DO i = 1, nrepl
IF (i == 1) THEN
WRITE(*, '(" ", a, i6)') 'Replicate number ', i
ELSE
WRITE(*, '("+", a, i6, a, g14.6)') &
'Replicate number ', i, ' last RSS =', rss(nv+i0)
END IF
CALL random_pick(first, last, nv+i0)
CALL replace2(first, last, nv+i0)
WRITE(11, '(a, i4, a, g14.6)') &
'Replicate ', nrepl, ' RSS = ', rss(nv+i0)
END DO
CASE ('d', 'D') ! Display best found
DO i = first, max_size
WRITE(*, '(" ", a, i3, a)') 'Best subsets found of', i-i0, ' variables'
WRITE(11, '(" ", a, i3, a)') 'Best subsets found of', i-i0, ' variables'
WRITE(*, *) ' R.S.S. Variable numbers'
WRITE(11, *) ' R.S.S. Variable numbers'
pos = (i-1)*i/2 + 1
DO j = 1, nbest
WRITE(*, 940) ress(i,j), lopt(pos:pos+i-1, j)
WRITE(11, 940) ress(i,j), lopt(pos:pos+i-1, j)
940 FORMAT(' ', g13.5, ' ', 15i4: (/ 16(' '), 15i4))
END DO
END DO
CASE ('i', 'I') ! Force variables IN
CALL current_status()
ALLOCATE( list(nvar) )
CALL get_numbers(list, nv)
ier = 0
IF (nv > 0) THEN
! Check that none of the variables to be forced IN
! is amongst any currently forced OUT.
DO i = last+1, ncol
IF (ANY(list(1:nv) == vorder(i))) THEN
WRITE(*, '(a, i4, a)') ' Variable ', vorder(i), &
' is currently forced OUT'
nv = 0
EXIT
END IF
END DO
IF (fit_const) THEN
CALL reordr(list, nv, 2, ier)
ELSE
CALL reordr(list, nv, 1, ier)
END IF
END IF
IF (ier > 0 .AND. ier /= 4) THEN
WRITE(*, *)'** Error in list of variable numbers **'
WRITE(*, *)'** Variable not found or on list twice **'
ELSE
first = nv + 1
IF (fit_const) first = nv + 2
WRITE(11, *)
WRITE(11, '(75a1)') ('-', i=1,75)
WRITE(11, '(a, i4)') 'No. of variables to be forced in = ', nv
IF (nv > 0) THEN
WRITE(11, *) 'Variables:'
WRITE(11, '(5(i4, 1x, a8, 2x))') (list(i), vname(list(i)), i=1,nv)
END IF
CALL init_subsets(nvar_max, fit_const)
END IF
DEALLOCATE( list )
CASE ('o', 'O') ! Force variables OUT
CALL current_status()
ALLOCATE( list(nvar) )
CALL get_numbers(list, nv)
ier = 0
IF (nv > 0) THEN
! Check that none of the variables to be forced OUT
! is amongst any currently forced IN.
DO i = 1, first-1
IF (ANY(list(1:nv) == vorder(i))) THEN
WRITE(*, '(a, i4, a)') ' Variable ', vorder(i), &
' is currently forced IN'
nv = 0
EXIT
END IF
END DO
DO i = 1, nv ! Lower variable to end positions
DO j = first, ncol-i
IF (vorder(j) == list(i)) THEN
CALL vmove(j, ncol+1-i, ier)
EXIT
END IF
END DO
END DO
END IF
IF (ier > 0 .AND. ier /= 4) THEN
WRITE(*, *)'** Error in list of variable numbers **'
WRITE(*, *)'** Variable not found or on list twice **'
ELSE
last = ncol - nv
WRITE(11, *)
WRITE(11, '(75a1)') ('-', i=1,75)
WRITE(11, '(a, i4)') 'No. of variables to be forced out = ', nv
IF (nv > 0) THEN
WRITE(11, *) 'Variables:'
WRITE(11, '(5(i4, 1x, a8, 2x))') (list(i), vname(list(i)), i=1,nv)
END IF
CALL init_subsets(nvar_max, fit_const)
END IF
DEALLOCATE( list )
CASE ('l', 'L') ! Least-squares regression coeffs. and R^2
WRITE(*, *)'WARNING: These estimates may be seriously biassed'
WRITE(11, *)'WARNING: These estimates may be seriously biassed'
CALL current_status()
ALLOCATE( list(nvar) )
CALL get_numbers(list, nv)
IF (nv > 0) THEN
! If variables are being forced in and/or out,
! this could move them out of position.
! Protect against this by copying the list of
! variables and restoring at the end.
ALLOCATE( order_copy(ncol) )
order_copy = vorder
CALL reordr(list, nv, i0+1, ier)
IF (ier == 0) THEN
IF (fit_const) THEN
ALLOCATE( beta(0:nv) )
ELSE
ALLOCATE( beta(1:nv) )
END IF
CALL regcf(beta, nv+i0, ier)
IF (ier == 0) THEN
WRITE(*, *)'LS regression coefficients'
WRITE(11, *)'LS regression coefficients'
WRITE(*, '(3(1x, a8, 1x, g13.5, " | "))') &
(vname(vorder(i+i0)), beta(i), i=1-i0, nv)
WRITE(11, '(3(a8, 1x, g13.5, " | "))') &
(vname(vorder(i+i0)), beta(i), i=1-i0, nv)
r2 = one - rss(nv+i0) / total_sumsq
WRITE(*, '(1x, a, f8.4)') 'R^2 for this model = ', r2
WRITE(11, '(1x, a, f8.4)') 'R^2 for this model = ', r2
END IF
DEALLOCATE( beta )
END IF
IF (first > i0+1) CALL reordr(order_copy, first-1, 1, ier)
IF (last < ncol) CALL reordr(order_copy, last, 1, ier)
DEALLOCATE( order_copy )
END IF
DEALLOCATE( list )
CASE ('v', 'V') ! Show variable names
WRITE(*, 950) (i, vname(i), i=1, nvar)
950 FORMAT(6(' ', i3, ' ', a8))
CASE ('m', 'M') ! Mallows Cp for best subsets
IF (ncol >= nobs) THEN
WRITE(*, *) 'No degrees of freedom available for residual variance'
CYCLE
END IF
var = sserr / (nobs - ncol)
best_size = 1
WRITE(*, *) 'Mallows Cp - with Gilmours correction'
WRITE(11, *) 'Mallows Cp - with Gilmours correction'
IF (fit_const) THEN
WRITE(*, *) 'N.B. The no. of variables INCLUDES the constant term'
WRITE(11, *) 'N.B. The no. of variables INCLUDES the constant term'
END IF
DO i = first, max_size
Cp = ress(i,1)/var - nobs + 2*i - 2*(ncol-1)/DBLE(nobs-ncol-2)
WRITE(*, '(" ", i6, " ", f10.2)') i, Cp
WRITE(11, '(i6, " ", f10.2)') i, Cp
IF (i > first) THEN
fmax = Cp_last - Cp + 2*(nobs-ncol-1)/DBLE(nobs-ncol-2)
IF (fmax > zero) THEN
CALL f1max(nobs-ncol, ncol+1-i, f1, f5, f10, ier)
IF (fmax > f1) THEN
WRITE(*, '(20x, a)') 'Reduction significant at the 1% level'
WRITE(11, '(20x, a)') 'Reduction significant at the 1% level'
best_size = i
ELSE IF (fmax > f5) THEN
WRITE(*, '(20(" "), a)') 'Reduction significant at the 5% level'
WRITE(11, '(20(" "), a)') 'Reduction significant at the 5% level'
best_size = i
ELSE IF (fmax > f10) THEN
WRITE(*, '(20(" "), a)') 'Reduction significant at the 10% level'
WRITE(11, '(20(" "), a)') 'Reduction significant at the 10% level'
ELSE
WRITE(*, '(20(" "), a)') 'Reduction is not significant'
WRITE(11, '(20(" "), a)') 'Reduction is not significant'
END IF
END IF
END IF
Cp_last = Cp
END DO
pos = best_size * (best_size - 1) / 2
WRITE(*, *) 'A good subset for prediction appears to be:'
WRITE(*, '(5(1x, i3, 1x, a8, " |"))') &
(lopt(pos+i,1), vname(lopt(pos+i,1)), i=1, best_size)
WRITE(11, *) 'A good subset for prediction appears to be:'
WRITE(11, '(5(1x, i3, 1x, a8, " |"))') &
(lopt(pos+i,1), vname(lopt(pos+i,1)), i=1, best_size)
WRITE(11, *)
CASE ('p', 'P') ! Calculate the PRESS statistic for best subsets
WRITE(*, *) 'PRESS statistic for best-fitting subsets'
WRITE(11, *) 'PRESS statistic for best-fitting subsets'
WRITE(*, *) 'Size PRESS statistic Mean sq. error Variable nos.'
WRITE(11, *) 'Size PRESS statistic Mean sq. error Variable nos.'
INQUIRE(10, OPENED=OK)
IF (.NOT. OK) OPEN(10, FILE=fname_dat, STATUS='OLD')
ALLOCATE( x(0:nvar), beta(ncol), xcopy(ncol) )
x(0) = one
DO i = first, max_size
pos = i*(i-1)/2 + 1
CALL reordr(lopt(pos:,1), i, 1, ier)
CALL regcf(beta, i, ier)
REWIND (10)
DO j = 1, line1-1 ! Skip to line1 in data file
READ(10, *)
END DO
press = zero
DO
IF (ypos > nvar) THEN
READ(10, *, IOSTAT=iostatus) x(1:nvar), y
ELSE IF (ypos .EQ. 1) THEN
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -