📄 selector.r

📁 r软件另一款可以计算核估计的软件包需安装r软件
💻 R
📖 第 1 页 / 共 3 页
字号:
## nstage - number of plug-in stages (1 or 2)## pilot - "amse" - different AMSE pilot##       - "samse" - SAMSE pilot## pre - "scale" - pre-scaled data##     - "sphere"- pre-sphered data #### Returns## matrix of psi functionals############################################################################psimat.2d <- function(x.star, nstage=1, pilot="samse", binned, bin.par){  if (nstage==1)    psi.fun <- psifun1.2d(x.star, pilot=pilot, binned=binned, bin.par=bin.par)  else if (nstage==2)    psi.fun <- psifun2.2d(x.star, pilot=pilot, binned=binned, bin.par=bin.par)  psi40 <- psi.fun[1]   psi31 <- psi.fun[2]   psi22 <- psi.fun[3]   psi13 <- psi.fun[4]   psi04 <- psi.fun[5]   coeff <- c(1, 2, 1, 2, 4, 2, 1, 2, 1)  psi.fun <- c(psi40, psi31, psi22, psi31, psi22, psi13, psi22, psi13, psi04)  return(matrix(coeff * psi.fun, nc=3, nr=3))}psimat.nd <- function(x.star, d, nstage=1, pilot="samse", binned, bin.par){    if (nstage==1)    psi.fun <- psifun1.nd(x.star, d=d, pilot=pilot, binned=binned, bin.par=bin.par)  else if (nstage==2)    psi.fun <- psifun2.nd(x.star, d=d, pilot=pilot, binned=binned, bin.par=bin.par)  coeff <- Psi4.list(d)$coeff    return(matrix(coeff * psi.fun, nc=d*(d+1)/2, nr=d*(d+1)/2))}###  unconstrained pilot selectorspsimat.unconstr.nd <- function(x, nstage=1, Sd4, Sd6){  if (nstage==1)    psi.fun <- psifun1.unconstr.nd(x=x, Sd4=Sd4, Sd6=Sd6)  else if (nstage==2)    psi.fun <- psifun2.unconstr.nd(x=x, Sd4=Sd4, Sd6=Sd6)  return(invvec(psi.fun))}  ############################################################################## Plug-in bandwidth selectors#############################################################################    ############################################################################## Computes plug-in full bandwidth matrix - 2 to 6 dim#### Parameters## x - data points## Hstart - initial value for minimisation## nstage - number of plug-in stages (1 or 2)## pilot - "amse" - different AMSE pilot##       - "samse" - SAMSE pilot##       - "unconstr" - unconstrained pilot## pre - "scale" - pre-scaled data##     - "sphere"- pre-sphered data #### Returns## Plug-in full bandwidth matrix###############################################################################hpi <- function(x, nstage=2, binned=TRUE, bgridsize){  ## 1-d selector is taken from KernSmooth's dpik    if (missing(bgridsize)) bgridsize <- 401  return(dpik(x=x, level=nstage, gridsize=bgridsize))}Hpi <- function(x, nstage=2, pilot="samse", pre="sphere", Hstart, binned=FALSE, bgridsize, amise=FALSE){  n <- nrow(x)  d <- ncol(x)  RK <- (4*pi)^(-d/2)  if(!is.matrix(x)) x <- as.matrix(x)  if (substr(pre,1,2)=="sc")  {    x.star <- pre.scale(x)    S12 <- diag(sqrt(diag(var(x))))    Sinv12 <- chol2inv(chol(S12))  }  else if (substr(pre,1,2)=="sp")  {    x.star <- pre.sphere(x)    S12 <- matrix.sqrt(var(x))    Sinv12 <- chol2inv(chol(S12))  }    if (substr(pilot,1,1)=="a")    pilot <- "amse"  else if (substr(pilot,1,1)=="s")    pilot <- "samse"  else if (substr(pilot,1,1)=="u")    pilot <- "unconstr"               if (pilot=="amse" & d>2)    stop("SAMSE pilot selectors are better for higher dimensions")  if (pilot=="unconstr" & d>=6)    stop("Uconstrained pilots not implemented yet for 6-dim data")    if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d)  if (d > 4) binned <- FALSE    if (binned)  {    ## linear binning    H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)    ##bin.par <- binning(x=x.star, bgridsize=bgridsize, H=sqrt(diag(H.max)))    ## for large samples, take subset for pilot estimation    nsub <- min(n, 1e4)    x.star.sub <- x.star[1:nsub,]    bin.par.sub <- binning(x=x.star.sub, bgridsize=bgridsize, H=sqrt(diag(H.max)))   }  else     x.star.sub <- x.star   ## psi4.mat is on pre-transformed data scale  if (pilot!="unconstr")  {    if (d==2)      psi4.mat <- psimat.2d(x.star.sub, nstage=nstage, pilot=pilot, binned=binned, bin.par=bin.par.sub)    else       psi4.mat <- psimat.nd(x.star.sub, d=d, nstage=nstage, pilot=pilot, binned=binned, bin.par=bin.par.sub)  }  else  {    ### symmetriser matrices for unconstrained pilot selectors    ##Sd2 <- Sdr(d=d, r=2)    Sd4 <- Sdr(d=d, r=4)    Sd6 <- Sdr(d=d, r=6)    psi4.mat <- psimat.unconstr.nd(x=x, nstage=nstage, Sd4=Sd4, Sd6=Sd6)  }  if (pilot=="unconstr")  {    ## use normal reference bandwidth as initial condition     if (missing(Hstart))       Hstart <- (4/(n*(d + 2)))^(2/(d + 4)) * var(x)        Hstart <- matrix.sqrt(Hstart)    ## PI is estimate of AMISE    pi.temp.unconstr <- function(vechH)    {       H <- invvech(vechH) %*% invvech(vechH)      pi.temp <- 1/(det(H)^(1/2)*n)*RK + 1/4* t(vec(H)) %*% psi4.mat %*% vec(H)      return(drop(pi.temp))    }    ## psi4.mat always a zero eigen-value since it has repeated rows     result <- optim(vech(Hstart), pi.temp.unconstr, method="BFGS")    H <- invvech(result$par) %*% invvech(result$par)  }  else if (pilot!="unconstr")  {    ## use normal reference bandwidth as initial condition     if (missing(Hstart))       Hstart <- (4/(n*(d + 2)))^(2/(d + 4)) * var(x.star)    else      Hstart <- Sinv12 %*% Hstart %*% Sinv12        Hstart <- matrix.sqrt(Hstart)    ## PI is estimate of AMISE    pi.temp <- function(vechH)    {       H <- invvech(vechH) %*% invvech(vechH)      pi.temp <- 1/(det(H)^(1/2)*n)*RK + 1/4* t(vech(H)) %*% psi4.mat %*% vech(H)      return(drop(pi.temp))     }        ## check that Psi_4 is positive definite      if (prod(eigen(psi4.mat)$val > 0) == 1)    {      result <- optim(vech(Hstart), pi.temp, method="BFGS")      H <- invvech(result$par) %*% invvech(result$par)    }    else    {       cat("Psi matrix not positive definite\n")      H <- matrix(NA, nc=d, nr=d)     }        ## back-transform    H <- S12 %*% H %*% S12  }    if (!amise)    return(H)  else    return(list(H = H, PI=result$value))}     ################################################################################ Computes plug-in diagonal bandwidth matrix for 2 to 6-dim## Parameters# x - data points# nstage - number of plug-in stages (1 or 2)# pre - "scale" - pre-scaled data#     - "sphere"- pre-sphered data ## Returns# Plug-in diagonal bandwidth matrix###############################################################################Hpi.diag <- function(x, nstage=2, pilot="amse", pre="scale", Hstart, binned=FALSE, bgridsize){  if(!is.matrix(x)) x <- as.matrix(x)    if (substr(pre,1,2)=="sc")    x.star <- pre.scale(x)  else if (substr(pre,1,2)=="sp")    x.star <- pre.sphere(x)  if (substr(pre,1,2)=="sp")    stop("Using pre-sphering won't give diagonal bandwidth matrix\n")  if (substr(pilot,1,1)=="a")    pilot <- "amse"  else if (substr(pilot,1,1)=="s")    pilot <- "samse"  n <- nrow(x)  d <- ncol(x)  RK <- (4*pi)^(-d/2)  s1 <- sd(x[,1])  s2 <- sd(x[,2])  if (substr(pre,1,2)=="sc") S12 <- diag(sqrt(diag(var(x))))  else if (substr(pre,1,2)=="sp") S12 <- matrix.sqrt(var(x))  Sinv12 <- chol2inv(chol(S12))      if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d)  if (d > 4) binned <- FALSE    if (binned)  {    H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x.star)    ##bin.par <- binning(x.star, bgridsize, sqrt(diag(H.max)))    ## for large samples, take subset for pilot estimation    nsub <- min(n, 1e4)    x.star.sub <- x.star[sample(1:n, size=nsub),]    bin.par.sub <- binning(x=x.star.sub, bgridsize=bgridsize, H=sqrt(diag(H.max)))   }  else    x.star.sub <- x.star    if (d==2)  {    if (nstage == 1)      psi.fun <- psifun1.2d(x.star.sub, pilot=pilot, binned=binned, bin.par=bin.par.sub)    else if (nstage == 2)      psi.fun <- psifun2.2d(x.star.sub, pilot=pilot, binned=binned, bin.par=bin.par.sub)        psi40 <- psi.fun[1]    psi22 <- psi.fun[3]    psi04 <- psi.fun[5]        ## diagonal bandwidth matrix for 2-dim has exact formula     h1 <- (psi04^(3/4)*RK/(psi40^(3/4)*(sqrt(psi40 * psi04)+psi22)*n))^(1/6)    h2 <- (psi40/psi04)^(1/4) * h1    return(diag(c(s1^2*h1^2, s2^2*h2^2)))  }  else   {     if (pilot=="amse")      stop("SAMSE pilot selectors are better for higher dimensions")     ## use normal reference bandwidth as initial condition    if (missing(Hstart))        Hstart <- (4/(n*(d + 2)))^(2/(d + 4)) * var(x.star)    else           Hstart <- Sinv12 %*% Hstart %*% Sinv12    Hstart <- matrix.sqrt(Hstart)     psi4.mat <- psimat.nd(x.star.sub, d=d, nstage=nstage, pilot=pilot, binned=binned, bin.par=bin.par.sub)          ## PI is estimate of AMISE    pi.temp <- function(diagH)    {       H <- diag(diagH) %*% diag(diagH)      pi.temp <- 1/(det(H)^(1/2)*n)*RK + 1/4* t(vech(H)) %*% psi4.mat %*% vech(H)    return(drop(pi.temp))     }        result <- optim(diag(Hstart), pi.temp, method="BFGS")    H <- diag(result$par) %*% diag(result$par)    ## back-transform  if (pre=="scale") S12 <- diag(sqrt(diag(var(x))))  else if (pre=="sphere") S12 <- matrix.sqrt(var(x))  H <- S12 %*% H %*% S12    return(H)  }}################################################################################ Cross-validation bandwidth selectors############################################################################################################################################################### Computes the least squares cross validation LSCV function for 2 to 6 dim# # Parameters# x - data values# H - bandwidth matrix## Returns# LSCV(H)###############################################################################lscv.1d.binned <- function(xbin.par, h){  n <- sum(xbin.par$counts)  lscv1 <- n^2*bkfe(x=xbin.par$counts, range.x=xbin.par$range.x[[1]], drv=0, bandwidth=sqrt(2)*h, binned=TRUE)  lscv2 <- n^2*(bkfe(x=xbin.par$counts, range.x=xbin.par$range.x[[1]], drv=0, bandwidth=h, binned=TRUE) - dnorm(0, mean=0, sd=h)/n)   lscv <- lscv1/n^2 - 2/(n*(n-1))*lscv2  return(lscv)}lscv.mat <- function(x, H, binned=FALSE, bin.par){  n <- nrow(x)  ##d <- ncol(x)  lscv1 <- dmvnorm.sum(x, 2*H, inc=1, binned=binned, bin.par=bin.par)  lscv2 <- dmvnorm.sum(x, H, inc=0, binned=binned, bin.par=bin.par)    return(lscv1/n^2 - 2/(n*(n-1))*lscv2)     }   ################################################################################ Finds the bandwidth matrix that minimises LSCV for 2 to 6 dim# # Parameters# x - data values# Hstart - initial bandwidth matrix## Returns# H_LSCV###############################################################################hlscv <- function(x, binned=TRUE, bgridsize){  if (any(duplicated(x)))    stop("Data contain duplicated values: LSCV is not well-behaved in this case")  n <- length(x)  d <- 1  hnorm <- sqrt((4/(n*(d + 2)))^(2/(d + 4)) * var(x))  if (missing(bgridsize)) bgridsize <- 401    xbin.par <- dfltCounts.ks(x, gridsize=bgridsize)  lscv.1d.temp <- function(h)  {    return(lscv.1d.binned(x=xbin.par, h=h))  }  opt <- optimise(f=lscv.1d.temp, interval=c(0.2*hnorm, 5*hnorm, tol=.Machine$double.eps))$minimum  return(opt)    }  Hlscv <- function(x, Hstart){  if (any(duplicated(x)))    stop("Data contain duplicated values: LSCV is not well-behaved in this case")  n <- nrow(x)  d <- ncol(x)  ##RK <- (4*pi)^(-d/2)  ## use normal reference selector as initial condn  if (missing(Hstart))     Hstart <- matrix.sqrt((4/ (n*(d + 2)))^(2/(d + 4)) * var(x))    lscv.mat.temp <- function(vechH)  {    ##  ensures that H is positive definite    H <- invvech(vechH) %*% invvech(vechH)    return(lscv.mat(x=x, H=H, binned=FALSE))  }  result <- optim(vech(Hstart), lscv.mat.temp, method="Nelder-Mead")                                        #control=list(abstol=n^(-10*d)))        return(invvech(result$par) %*% invvech(result$par))}################################################################################ Finds the diagonal bandwidth matrix that minimises LSCV for 2 to 6 dim# # Parameters# x - data values# Hstart - initial bandwidth matrix## Returns# H_LSCV,diag###############################################################################Hlscv.diag <- function(x, Hstart, binned=FALSE, bgridsize){  n <- nrow(x)  d <- ncol(x)  RK <- (4*pi)^(-d/2)    if (missing(Hstart))     Hstart <- matrix.sqrt((4/ (n*(d + 2)))^(2/(d + 4)) * var(x))  if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d)  if (d > 4) binned <- FALSE  if (binned)  {    H.max <- (((d+8)^((d+6)/2)*pi^(d/2)*RK)/(16*(d+2)*n*gamma(d/2+4)))^(2/(d+4))* var(x)    ## linear binning    bin.par <- binning(x=x, bgridsize=bgridsize, H=sqrt(diag(H.max)))  }    lscv.mat.temp <- function(diagH)  {    H <- diag(diagH^2)    return(lscv.mat(x=x, H=H, binned=binned, bin.par=bin.par))  }  result <- optim(diag(Hstart), lscv.mat.temp, method="Nelder-Mead")                   return(diag(result$par^2))}################################################################################ Computes the biased cross validation BCV function for 2-dim# # Parameters# x - data values# H1, H2 - bandwidth matrices## Returns# BCV(H)###############################################################################bcv.mat <- function(x, H1, H2){  n <- nrow(x)  d <- 2  psi40 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(4,0), inc=0)  psi31 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(3,1), inc=0)  psi22 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(2,2), inc=0)  psi13 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(1,3), inc=0)  psi04 <- dmvnorm.deriv.2d.sum(x, Sigma=H2, r=c(0,4), inc=0)      coeff <- c(1, 2, 1, 2, 4, 2, 1, 2, 1)  psi.fun <- c(psi40, psi31, psi22, psi31, psi22, psi13, psi22, psi13,psi04)/(n*(n-1))  psi4.mat <- matrix(coeff * psi.fun, nc=3, nr=3)    RK <- (4*pi)^(-d/2)   bcv <- drop(n^(-1)*det(H1)^(-1/2)*RK + 1/4*t(vech(H1)) %*% psi4.mat %*% vech(H1))    return(list(bcv=bcv, psimat=psi4.mat))}################################################################################ Find the bandwidth matrix that minimises the BCV for 2-dim# # Parameters# x - data values
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -