⭐ 欢迎来到虫虫下载站! | 📦 资源下载 📁 资源专辑 ℹ️ 关于我们
⭐ 虫虫下载站

📄 subset.f90

📁 The module LSQ is for unconstrained linear least-squares fitting. It is based upon Applied Statisti
💻 F90
📖 第 1 页 / 共 4 页
字号:
PROGRAM subset

!  Interactive program for finding best-fitting subsets of variables.
!  Author:  Alan Miller
!  Retired from CSIRO Mathematical & Information Sciences, Melbourne, Australia
!  amiller @ bigpond.net.au
!  http://www.ozemail.com.au/~milleraj
!  http://users.bigpond.net.au/amiller/

!  Latest revision - 7 August 2002
!  Thanks to Paul Mather, Nottingham, UK for:
!  1. i0 undefined when form_red_file is called.
!  2. Suggesting that END FILE be replaced with CLOSE.
!  3. Several changes to form_red_file to correctly handle the case in which
!     a constant is not being fitted.
!  Thanks to Wei-Yin Loh, Univ. of Wisconsin, USA for pointing out an error
!  in a call to REORDR in the code for cross-validation.
!  Other changes:
!  1. In SUBROUTINE update, the INTENT of xmin, xmax, xmean & sxx has been
!     changed to IN OUT (previously OUT).

USE lsq
USE find_subsets

IMPLICIT NONE

CHARACTER (LEN = 40) :: fname_dat, fname_red, fname_rpt
CHARACTER (LEN = 1)  :: ans, bel = CHAR(7), yesno
CHARACTER (LEN = 10) :: string
INTEGER              :: nvar, nvar_max, first, last, ier, i, nsize, pos, j,  &
                        ypos, in, dimc, nv, i0, best_size, line1, nrepl,    &
                        search_method, criterion, iostatus, rank_deficit, ndf
INTEGER, ALLOCATABLE :: list(:), seed(:), order_copy(:)
LOGICAL, ALLOCATABLE :: lindep(:)

REAL (dp)            :: one = 1.0, fin, fout, total_sumsq, r2, msep, press, &
                        y, e, h
REAL (dp), ALLOCATABLE  :: cormat(:), ycorr(:), beta(:), x(:), xcopy(:)
CHARACTER (LEN = 8), ALLOCATABLE :: vname(:)
CHARACTER (LEN = 8)  :: yname
CHARACTER (LEN = 42) :: version = 'Subset version 1.11, date 9 May 2001'
CHARACTER (LEN = 32) :: method(4) = (/ 'Stepwise (Efroymson)            ',  &
                                       'Sequential replacement          ',  &
                                       '2-at-a-time replacement         ',  &
                                       'Best subsets (exhaustive search)' /)
CHARACTER (LEN = 14) :: crit_name(5) = (/ 'AIC (Akaike)  ', 'BIC (Bayesian)',&
                                          'Mallows Cp    ', 'Hannan-Quinn  ',&
                                          'F-ratio = 4.0 ' /)
LOGICAL              :: fit_const, lsel = .false., OK
REAL                 :: var, Cp, Cp_last, fmax, f1, f5, f10, zero = 0.0

CALL start()                 ! Read in data and form QR reduction, if necessary

WRITE(*, *)'Enter maximum size of subset to be considered: '
READ(*, *) nvar_max
IF (nvar_max > nvar) THEN
  WRITE(*, *)'Sorry, too many.   Reset to ', nvar
  nvar_max = nvar
END IF
WRITE(11, *)'Max. subset size = ', nvar_max
WRITE(*, *)'How many subsets of each size do you want recorded?: '
READ(*, *) nbest
WRITE(11, *)'No. of best subsets = ', nbest
WRITE(11, 950) (i, vname(i), i=1, nvar)

CALL init_subsets(nvar_max, fit_const)
IF (fit_const) THEN
  i0 = 1
  total_sumsq = rss(1)
ELSE
  i0 = 0
  total_sumsq = rss(1) + d(1)*rhs(1)**2
END IF

!     Display subset selection menu and ask for choice.

DO
  WRITE(*, *)
  WRITE(*, '(30x, a)') 'Subset selection menu'
  WRITE(*, *) 'C Correlations & partial correlations    F Forward selection'
  WRITE(*, *) 'E Efroymson stepwise regression          B Backward elimination'
  WRITE(*, *) 'R Replacement sequentially               X Exhaustive search'
  WRITE(*, *) '2 Two-at-a-time replacement              D Display best found'
  WRITE(*, *) 'I Force variables IN                     O Force variables OUT'
  WRITE(*, *) 'L Least-squares regression coeffs.       V Show variable names'
  WRITE(*, *) 'M Mallows Cp for best subsets            P PRESS statistic'
  WRITE(*, *) 'S Stochastic + replacement of pairs      Q Quit this menu'
  WRITE(*, *)
  WRITE(*, *) 'N.B. Exhaustive search can be very slow.'
  WRITE(*, *) 'Use E, F, B, R and/or 2 first to establish good bounds'
  WRITE(*, *)
  WRITE(*, *) 'Enter your choice (upper or lower case OK): '
  READ(*, *) ans

  SELECT CASE (ans)
    CASE ('c', 'C')          ! Correlations & partial correlations
      IF (fit_const) THEN
        in = 1
      ELSE
        in = 0
      END IF
      nv = 0
      WRITE(*, *)'Do you want partial correlations? (Y/N): '
      READ(*, *) yesno
      IF (yesno .EQ. 'y' .OR. yesno .EQ. 'Y') THEN
        WRITE(*, *)'How many variables are to be forced in?: '
        READ(*, *) nv
        IF (nv > 0) THEN
          WRITE(*, 950) (i, vname(i), i=1, nvar)
          ALLOCATE(list(nv))
          WRITE(*, *)'Enter numbers of variables to be forced in: '
          READ(*, *) list
          CALL reordr(list, nv, in+1, ier)
          in = in + nv
        END IF
      END IF
      dimc = (last-in)*(last-in-1)/2
      ALLOCATE(cormat(dimc), ycorr(last))
      CALL partial_corr(in, cormat, dimc, ycorr, ier)
      CALL print_correlations()

      IF (nv > 0) DEALLOCATE(list)
      DEALLOCATE(cormat, ycorr)

    CASE ('f', 'F')          ! Forward selection
      CALL forwrd(first, last, ier)
      WRITE(*, *) 'Forward selection:'
      WRITE(11, *) 'Forward selection:'
      IF (fit_const) THEN
        WRITE(*, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar_max)
        WRITE(11, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar_max)
        900 FORMAT('     Order  Variable  Resid.sum of sq.'/      &
                   ('       ', i3, '  ', a8, ' ', g15.7))
      ELSE
        WRITE(*, 900) (i, vname(vorder(i)), rss(i), i=1,nvar_max)
        WRITE(11, 900) (i, vname(vorder(i)), rss(i), i=1,nvar_max)
      END IF

    CASE ('e', 'E')          ! Efroymson stepwise regression
      WRITE(*, *)'Enter F-to-add value (default = 4.0): '
      READ(*, '(a)') string
      IF (LEN_TRIM(string) .EQ. 0) THEN
        fin = 4.0
      ELSE
        READ(string, *) fin
      END IF

      WRITE(*, *)'Enter F-to-remove value (default = 4.0): '
      READ(*, '(a)') string
      IF (LEN_TRIM(string) .EQ. 0) THEN
        fout = 4.0
      ELSE
        READ(string, *) fout
      END IF
      WRITE(*, 910) fin, fout
      WRITE(11, 910) fin, fout
      910 FORMAT(' Efroymson stepwise regression algorithm'/        &
                 '     F-to-add = ', f8.2, '   F-to-remove = ', f8.2)

      CALL efroym(first, last, fin, fout, nsize, ier, 11)
      WRITE(*, *) 'Size =', nsize
      WRITE(*, 920) vname(vorder(1:nsize))
      WRITE(11, 920) vname(vorder(1:nsize))
      920 FORMAT(' Selected variables'/ (' ', 8a9))
      WRITE(*, 930) rss(nsize)
      WRITE(11, 930) rss(nsize)
      930 FORMAT(' Residual sum of squares for this model = ', g15.7/)

    CASE ('b', 'B')          ! Backward elimination
      CALL bakwrd(first, last, ier)
      WRITE(*, *) 'Backward elimination:'
      WRITE(11, *) 'Backward elimination:'
      IF (fit_const) THEN
        WRITE(*, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar)
        WRITE(11, 900) (i, vname(vorder(i+1)), rss(i+1), i=0,nvar)
      ELSE
        WRITE(*, 900) (i, vname(vorder(i)), rss(i), i=1,nvar)
        WRITE(11, 900) (i, vname(vorder(i)), rss(i), i=1,nvar)
      END IF

    CASE ('r', 'R')          ! Replacement sequentially
      CALL seqrep(first, last, ier)
      WRITE(*, *) 'Sequential replacement:'
      WRITE(11, *) 'Sequential replacement:'
      WRITE(*, 920) vname(vorder(1:max_size))
      WRITE(11, 920) vname(vorder(1:max_size))
      WRITE(*, 930) rss(max_size)
      WRITE(11, 930) rss(max_size)

    CASE ('x', 'X')          ! Exhaustive search
      CALL xhaust(first, last, ier)
      WRITE(*, *) 'Exhaustive search:'
      WRITE(11, *) 'Exhaustive search:'
      j = max_size*(max_size-1)/2 + 1
      WRITE(*, 920) vname(lopt(j:j+max_size-1, 1))
      WRITE(11, 920) vname(lopt(j:j+max_size-1, 1))
      WRITE(*, 930) ress(max_size,1)
      WRITE(11, 930) ress(max_size,1)

    CASE ('2')               ! Two-at-a-time replacement
      CALL seq2(first, last, ier)
      WRITE(*, *) 'Two-at-a-time sequential replacement:'
      WRITE(11, *) 'Two-at-a-time sequential replacement:'
      WRITE(*, 920) vname(vorder(1:max_size))
      WRITE(11, 920) vname(vorder(1:max_size))
      WRITE(*, 930) rss(max_size)
      WRITE(11, 930) rss(max_size)

    CASE ('s', 'S')          ! Stochastic + replacement of pairs
      WRITE(*, *) 'Random start, two-at-a-time replacement:'
      WRITE(11, *) 'Random start, two-at-a-time replacement:'
      CALL set_seed()
      WRITE(*, '(a)', ADVANCE='NO') ' How many variables in subset? '
      READ(*, *) nv
      WRITE(11, *) 'No. of variables (excl. constant) = ', nv
      WRITE(*, '(a)', ADVANCE='NO') ' How many replications? '
      READ(*, *) nrepl
      WRITE(11, *) 'No. of replicates = ', nrepl
      DO i = 1, nrepl
        IF (i == 1) THEN
          WRITE(*, '(" ", a, i6)') 'Replicate number ', i
        ELSE
          WRITE(*, '("+", a, i6, a, g14.6)')  &
                'Replicate number ', i, '  last RSS =', rss(nv+i0)
        END IF
        CALL random_pick(first, last, nv+i0)
        CALL replace2(first, last, nv+i0)
        WRITE(11, '(a, i4, a, g14.6)')  &
                  'Replicate ', nrepl, '  RSS = ', rss(nv+i0)
      END DO

    CASE ('d', 'D')          ! Display best found
      DO i = first, max_size
        WRITE(*, '(" ", a, i3, a)') 'Best subsets found of', i-i0, ' variables'
        WRITE(11, '(" ", a, i3, a)') 'Best subsets found of', i-i0, ' variables'
        WRITE(*, *) '   R.S.S.     Variable numbers'
        WRITE(11, *) '   R.S.S.     Variable numbers'
        pos = (i-1)*i/2 + 1
        DO j = 1, nbest
          WRITE(*, 940) ress(i,j), lopt(pos:pos+i-1, j)
          WRITE(11, 940) ress(i,j), lopt(pos:pos+i-1, j)
          940 FORMAT(' ', g13.5, '  ', 15i4: (/ 16(' '), 15i4))
        END DO
      END DO

    CASE ('i', 'I')          ! Force variables IN
      CALL current_status()
      ALLOCATE( list(nvar) )
      CALL get_numbers(list, nv)
      ier = 0
      IF (nv > 0) THEN
                             ! Check that none of the variables to be forced IN
                             ! is amongst any currently forced OUT.
        DO i = last+1, ncol
          IF (ANY(list(1:nv) == vorder(i))) THEN
            WRITE(*, '(a, i4, a)') ' Variable ', vorder(i),  &
                                   ' is currently forced OUT'
            nv = 0
            EXIT
          END IF
        END DO
        IF (fit_const) THEN
          CALL reordr(list, nv, 2, ier)
        ELSE
          CALL reordr(list, nv, 1, ier)
        END IF
      END IF
      IF (ier > 0 .AND. ier /= 4) THEN
        WRITE(*, *)'** Error in list of variable numbers **'
        WRITE(*, *)'** Variable not found or on list twice **'
      ELSE
        first = nv + 1
        IF (fit_const) first = nv + 2
        WRITE(11, *)
        WRITE(11, '(75a1)') ('-', i=1,75)
        WRITE(11, '(a, i4)') 'No. of variables to be forced in = ', nv
        IF (nv > 0) THEN
          WRITE(11, *) 'Variables:'
          WRITE(11, '(5(i4, 1x, a8, 2x))') (list(i), vname(list(i)), i=1,nv)
        END IF
        CALL init_subsets(nvar_max, fit_const)
      END IF
      DEALLOCATE( list )

    CASE ('o', 'O')          ! Force variables OUT
      CALL current_status()
      ALLOCATE( list(nvar) )
      CALL get_numbers(list, nv)
      ier = 0
      IF (nv > 0) THEN
                             ! Check that none of the variables to be forced OUT
                             ! is amongst any currently forced IN.
        DO i = 1, first-1
          IF (ANY(list(1:nv) == vorder(i))) THEN
            WRITE(*, '(a, i4, a)') ' Variable ', vorder(i),  &
                                   ' is currently forced IN'
            nv = 0
            EXIT
          END IF
        END DO
        DO i = 1, nv                   ! Lower variable to end positions
          DO j = first, ncol-i
            IF (vorder(j) == list(i)) THEN
              CALL vmove(j, ncol+1-i, ier)
              EXIT
            END IF
          END DO
        END DO
      END IF
      IF (ier > 0 .AND. ier /= 4) THEN
        WRITE(*, *)'** Error in list of variable numbers **'
        WRITE(*, *)'** Variable not found or on list twice **'
      ELSE
        last = ncol - nv
        WRITE(11, *)
        WRITE(11, '(75a1)') ('-', i=1,75)
        WRITE(11, '(a, i4)') 'No. of variables to be forced out = ', nv
        IF (nv > 0) THEN
          WRITE(11, *) 'Variables:'
          WRITE(11, '(5(i4, 1x, a8, 2x))') (list(i), vname(list(i)), i=1,nv)
        END IF
        CALL init_subsets(nvar_max, fit_const)
      END IF
      DEALLOCATE( list )

    CASE ('l', 'L')          ! Least-squares regression coeffs. and R^2
      WRITE(*, *)'WARNING: These estimates may be seriously biassed'
      WRITE(11, *)'WARNING: These estimates may be seriously biassed'
      CALL current_status()
      ALLOCATE( list(nvar) )
      CALL get_numbers(list, nv)
      IF (nv > 0) THEN
                             ! If variables are being forced in and/or out,
                             ! this could move them out of position.
                             ! Protect against this by copying the list of
                             ! variables and restoring at the end.
        ALLOCATE( order_copy(ncol) )
        order_copy = vorder
        CALL reordr(list, nv, i0+1, ier)
        IF (ier == 0) THEN
          IF (fit_const) THEN
            ALLOCATE( beta(0:nv) )
          ELSE
            ALLOCATE( beta(1:nv) )
          END IF
          CALL regcf(beta, nv+i0, ier)
          IF (ier == 0) THEN
            WRITE(*, *)'LS regression coefficients'
            WRITE(11, *)'LS regression coefficients'
            WRITE(*, '(3(1x, a8, 1x, g13.5, " | "))')   &
                          (vname(vorder(i+i0)), beta(i), i=1-i0, nv)
            WRITE(11, '(3(a8, 1x, g13.5, " |  "))')      &
                          (vname(vorder(i+i0)), beta(i), i=1-i0, nv)
            r2 = one - rss(nv+i0) / total_sumsq
            WRITE(*, '(1x, a, f8.4)') 'R^2 for this model = ', r2
            WRITE(11, '(1x, a, f8.4)') 'R^2 for this model = ', r2
          END IF
          DEALLOCATE( beta )
        END IF
        IF (first > i0+1) CALL reordr(order_copy, first-1, 1, ier)
        IF (last < ncol) CALL reordr(order_copy, last, 1, ier)
        DEALLOCATE( order_copy )
      END IF
      DEALLOCATE( list )

    CASE ('v', 'V')          ! Show variable names
      WRITE(*, 950) (i, vname(i), i=1, nvar)
      950 FORMAT(6(' ', i3, ' ', a8))

    CASE ('m', 'M')          ! Mallows Cp for best subsets
      IF (ncol >= nobs) THEN
        WRITE(*, *) 'No degrees of freedom available for residual variance'
        CYCLE
      END IF

      var = sserr / (nobs - ncol)
      best_size = 1
      WRITE(*, *) 'Mallows Cp - with Gilmours correction'
      WRITE(11, *) 'Mallows Cp - with Gilmours correction'
      IF (fit_const) THEN
        WRITE(*, *) 'N.B. The no. of variables INCLUDES the constant term'
        WRITE(11, *) 'N.B. The no. of variables INCLUDES the constant term'
      END IF
      DO i = first, max_size
        Cp = ress(i,1)/var - nobs + 2*i - 2*(ncol-1)/DBLE(nobs-ncol-2)
        WRITE(*, '(" ", i6, "     ", f10.2)') i, Cp
        WRITE(11, '(i6, "     ", f10.2)') i, Cp
        IF (i > first) THEN
          fmax = Cp_last - Cp + 2*(nobs-ncol-1)/DBLE(nobs-ncol-2)
          IF (fmax > zero) THEN
            CALL f1max(nobs-ncol, ncol+1-i, f1, f5, f10, ier)
            IF (fmax > f1) THEN
              WRITE(*, '(20x, a)') 'Reduction significant at the 1% level'
              WRITE(11, '(20x, a)') 'Reduction significant at the 1% level'
              best_size = i
            ELSE IF (fmax > f5) THEN
              WRITE(*, '(20(" "), a)') 'Reduction significant at the 5% level'
              WRITE(11, '(20(" "), a)') 'Reduction significant at the 5% level'
              best_size = i
            ELSE IF (fmax > f10) THEN
              WRITE(*, '(20(" "), a)') 'Reduction significant at the 10% level'
              WRITE(11, '(20(" "), a)') 'Reduction significant at the 10% level'
            ELSE
              WRITE(*, '(20(" "), a)') 'Reduction is not significant'
              WRITE(11, '(20(" "), a)') 'Reduction is not significant'
            END IF
          END IF
        END IF
        Cp_last = Cp
      END DO
      pos = best_size * (best_size - 1) / 2
      WRITE(*, *) 'A good subset for prediction appears to be:'
      WRITE(*, '(5(1x, i3, 1x, a8, " |"))')  &
                   (lopt(pos+i,1), vname(lopt(pos+i,1)), i=1, best_size)
      WRITE(11, *) 'A good subset for prediction appears to be:'
      WRITE(11, '(5(1x, i3, 1x, a8, " |"))')  &
                    (lopt(pos+i,1), vname(lopt(pos+i,1)), i=1, best_size)
      WRITE(11, *)

    CASE ('p', 'P')          ! Calculate the PRESS statistic for best subsets
      WRITE(*, *) 'PRESS statistic for best-fitting subsets'
      WRITE(11, *) 'PRESS statistic for best-fitting subsets'
      WRITE(*, *) 'Size  PRESS statistic  Mean sq. error  Variable nos.'
      WRITE(11, *) 'Size  PRESS statistic  Mean sq. error  Variable nos.'
      INQUIRE(10, OPENED=OK)
      IF (.NOT. OK) OPEN(10, FILE=fname_dat, STATUS='OLD')
      ALLOCATE( x(0:nvar), beta(ncol), xcopy(ncol) )
      x(0) = one

      DO i = first, max_size
        pos = i*(i-1)/2 + 1
        CALL reordr(lopt(pos:,1), i, 1, ier)
        CALL regcf(beta, i, ier)
        REWIND (10)
        DO j = 1, line1-1              ! Skip to line1 in data file
          READ(10, *)
        END DO
        press = zero
        DO
          IF (ypos > nvar) THEN
            READ(10, *, IOSTAT=iostatus) x(1:nvar), y
          ELSE IF (ypos .EQ. 1) THEN

⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -