📄 kde.r

📁 r软件另一款可以计算核估计的软件包需安装r软件
💻 R
📖 第 1 页 / 共 2 页
字号:
12 下一页
###############################################################################
# Multivariate kernel density estimators
###############################################################################


###############################################################################
# Generate grid over a set of points
#
# Parameters
# x - data points
# H - bandwidth matrix
# tol - tolerance = extra coverage exceeding the range of x   
# gridsize - number of points for each direction
#
# Returns
# gridx - list of intervals, one for each co-ord direction so that
#         gridx[[1]] x gridx[[2]] x ... x gridx[[d]] is the grid
# stepsize - vector of step sizes 
###############################################################################

make.grid.ks <- function(x, H, tol, gridsize, xmin, xmax, gridtype)
{
  d <- ncol(x)
  tol.H <-  tol * diag(H)
  if (missing(xmin))
     xmin <- apply(x, 2, min) - tol.H
  if (missing(xmax))
     xmax <- apply(x, 2, max) + tol.H
  
  stepsize <- rep(0, d)
  gridx <- numeric(0)
  if (length(gridsize)==1)
    gridsize <- rep(gridsize, d)

  if (missing(gridtype))
   gridtype <- rep("linear", d)

  gridtype.vec<- rep("", d)
  
  for (i in 1:d)
  {
    gridtype1 <- tolower(substr(gridtype[i],1,1))
    if (gridtype1=="l")
    {  
      gridx <- c(gridx, list(seq(xmin[i], xmax[i], length=gridsize[i])))
      stepsize[i] <- abs(gridx[[i]][1] - gridx[[i]][2])
      gridtype.vec[i] <- "linear"
    }
    else if (gridtype1=="s")
    {
      gridx.temp <- seq(sign(xmin[i])*sqrt(abs(xmin[i])), sign(xmax[i])*sqrt(abs(xmax[i])), length=gridsize[i])
      gridx <- c(gridx, list(sign(gridx.temp) * gridx.temp^2))
      stepsize[i] <- NA
      gridtype.vec[i] <- "sqrt"
    }
  }
  
  gridx <- c(gridx, list(stepsize = stepsize, gridtype=gridtype.vec))
    
  return(gridx)
}  


###############################################################################
# Generate kernel (rectangular) support at data point
# 
# Parameters
# x - data points
# H - bandwidth matrix
# tol - tolerance = extra coverage exceeding the range of x 
#
# Returns
# list of min and max points of support (here we parameterise rectangles
# by their min = lower left co-ord and max = upper right coord)
###############################################################################

make.supp <- function(x, H, tol)
{
  n <- nrow(x)
  d <- ncol(x)
  tol.H <- tol * diag(H)
  xmin <- matrix(0, nr=n, nc=d)
  xmax <- matrix(0, nr=n, nc=d)

  for (i in 1:n)
  {
    xmin[i,] <- x[i,] - tol.H
    xmax[i,] <- x[i,] + tol.H 
  }
           
  return(list(xmin = xmin, xmax = xmax))
}


###############################################################################
# Find the grid points contained in kernel support rectangles 
#
# Parameters
# gridx - grid (list of subdivided intervals)
# rectx - rectangles (list of min and max points) 
#
# Returns
# list of min and max points of the grid for each rectangle 
###############################################################################

find.gridpts <- function(gridx, suppx)
{
  xmax <- suppx$xmax
  xmin <- suppx$xmin
  d <- ncol(xmax)
  n <- nrow(xmax)
  gridpts.min <- matrix(0, nc=d, nr=n)
  gridpts.max <- gridpts.min
  
  for (i in 1:n)
    for (j in 1:d)    
    {
      # find index of last element of gridx smaller than min support  
      tsum <- sum(xmin[i,j] >= gridx[[j]])
      if (tsum==0)
        gridpts.min[i,j] <- 1
      else
        gridpts.min[i,j] <- tsum

      # find index of first element gridx greater than max support 
      gridpts.max[i,j] <- sum(xmax[i,j] >= gridx[[j]])
    }   
        
  return(list(xmin=gridpts.min, xmax=gridpts.max))
} 

###############################################################################
# Multivariate kernel density estimate using normal kernels
#
# Parameters
# x - points
# H - bandwidth matrix
# gridsize - number of interval points in grid
# supp - effective support of kernel
# eval.points - compute density estimate at these points (if missing
#            and dim(x) = 2, 3 compute density estimate over grid)  
# eval.levels - compute 3-D in 2-D slices taken at these level curves   
#
# Returns
# list with first d components with the points that the density
# estimate is evaluated at, and values of the density estimate 
###############################################################################


kde <- function(x, H, h, gridsize, gridtype, xmin, xmax, supp=3.7, eval.points, binned=FALSE, bgridsize,  positive=FALSE, adj.positive)
{
  if (is.vector(x))
  {
    if (missing(H)) d <- 1
    else
    {
      if (is.vector(H)) d <- 1
      else {x <- matrix(x, nrow=1); d <- ncol(x)}
    }
  }
  else d <- ncol(x)

  ## compute binned estimator
  if (binned)
  {
    if (!missing(eval.points))
      stop("Both binned=TRUE and eval.points are non-empty.")
    
    if (missing(bgridsize)) bgridsize <- default.gridsize(d)
    
    if (positive & d==1)
    {
      y <- log(x)
      fhat <- kde.binned(x=y, H=H, h=h, bgridsize=bgridsize, xmin=xmin, xmax=xmax)
      fhat$estimate <- fhat$estimate/exp(fhat$eval.points)
      fhat$eval.points <- exp(fhat$eval.points)
      fhat$x <- x
    }
    else
      fhat <- kde.binned(x=x, H=H, h=h, bgridsize=bgridsize, xmin=xmin, xmax=xmax)
  }
  else
  {
    ## compute exact (non-binned) estimator
    if (missing(gridsize)) gridsize <- default.gridsize(d)
    
    ## 1-dimensional    
    if (d==1)
    {
      if (!missing(H) & !missing(h))
        stop("Both H and h are both specified.")

      if (missing(h))
        h <- sqrt(H)

      if (missing(eval.points))
        fhat <- kde.grid.1d(x=x, h=h, gridsize=gridsize, supp=supp, positive=positive, xmin=xmin, xmax=xmax, adj.positive=adj.positive, gridtype=gridtype)
       else
         fhat <- kde.points.1d(x=x, h=h, eval.points=eval.points, positive=positive, adj.positive=adj.positive)
     }
     ## multi-dimensional
     else
     {  
       if (is.data.frame(x)) x <- as.matrix(x)

       if (missing(eval.points))
       {
         if (d==2)
           fhat <- kde.grid.2d(x=x, H=H, gridsize=gridsize, supp=supp, xmin=xmin, xmax=xmax, gridtype=gridtype)
         else if (d == 3)
           fhat <- kde.grid.3d(x=x, H=H, gridsize=gridsize, supp=supp, xmin=xmin, xmax=xmax, gridtype=gridtype) 
         else 
           stop("Need to specify eval.points for more than 3 dimensions")
       }
       else
         fhat <- kde.points(x, H, eval.points)
     }

   }

   fhat$binned <- binned
   ##fhat$gridtype <- gridtype

  ## add variable names
  if (d==1)
  {
    x.names <- deparse(substitute(x))
  }
  else
  {  
    x.names <- colnames(x)
    if (is.null(x.names))
    {
      x.names <- strsplit(deparse(substitute(x)), "\\[")[[1]][1]
      x.names <- paste(x.names, "[,", 1:d,"]",sep="") 
    }
  }
  fhat$names <- x.names
  class(fhat) <- "kde"
  

  return(fhat)
 }

###############################################################################
### Multivariate binned kernel density estimate using normal kernels
###############################################################################


kde.binned <- function(x, H, h, bgridsize, xmin, xmax, bin.par)
{
  ## linear binning
  if (missing(bin.par))
  {
    if (is.vector(x)) d <- 1
    else d <- ncol(x)

    if (d==1)
      if (missing(H)) H <- as.matrix(h^2)
      else {h <- sqrt(H); H <- as.matrix(H)}

    if (!is.diagonal(H) & d > 1)
      stop("Binned estimation defined for diagonal H only")
     
    if (missing(bgridsize)) bgridsize <- default.gridsize(d)
    bin.par <- binning(x=x, H=H, h, bgridsize, xmin, xmax, supp=3.7)
  }
  else
  {
    if (!is.list(bin.par$eval.points)) { d <- 1; bgridsize <- length(bin.par$eval.points)}
    else  { d <- length(bin.par$eval.points); bgridsize <- sapply(bin.par$eval.points, length)} 

    if (d==1)
      if (missing(H)) H <- as.matrix(h^2)
      else {h <- sqrt(H); H <- as.matrix(H)}
  }
  
  if (d==1)
    range.x <- list(range(bin.par$eval.points))
  else
    range.x <- lapply(bin.par$eval.points, range)
  
  fhat.grid <- drvkde(x=bin.par$counts, drv=rep(0,d), bandwidth=sqrt(diag(H)), binned=TRUE, range.x=range.x, se=FALSE, gridsize=bgridsize)
  eval.points <- fhat.grid$x.grid
  fhat.grid <- fhat.grid$est
  fhat.grid[fhat.grid<0] <- 0
  
  if (missing(x)) x <- NULL
  
  if (d==1)
     fhat <- list(x=x, eval.points=unlist(eval.points), estimate=fhat.grid, H=h^2, h=h)
  else
    fhat <- list(x=x, eval.points=eval.points, estimate=fhat.grid, H=H)

  return(fhat)
}


 #############################################################################
 ## Univariate kernel density estimate on a grid
 #############################################################################

 kde.grid.1d <- function(x, h, gridsize, supp=3.7, positive=FALSE, adj.positive, xmin, xmax, gridtype)
 {
   if (missing(xmin)) xmin <- min(x) - h*supp
   if (missing(xmax)) xmax <- max(x) + h*supp
   if (missing(gridtype)) gridtype <- "linear"
   
   if (positive)
   {
     if (missing(adj.positive)) adj.positive <- abs(min(x))
     y <- log(x + adj.positive)  ## transform positive data x to real line

     gridx <- seq(max(0, xmin), xmax, length=gridsize)
     gridy <- log(gridx + adj.positive)
     gridtype.vec <- "linear" 
   }
   else
   {
     y <- x
     gridtype1 <- tolower(substr(gridtype,1,1))
     if (gridtype1=="l")
     {
       gridy <- seq(xmin, xmax, length=gridsize)
       gridtype.vec <- "linear"
     }
     else if (gridtype1=="s")
     {
       gridy.temp <- seq(sign(xmin)*sqrt(abs(xmin)), sign(xmax)*sqrt(abs(xmax)), length=gridsize)
       gridy <- sign(gridy.temp) * gridy.temp^2
       gridtype.vec <- "sqrt"
     }
   }
   n <- length(y)
 
   est <- dnorm.mixt(x=gridy, mus=y, sigmas=rep(h, n), props=rep(1,n)/n)
   fhat <- list(x=y, eval.points=gridy, estimate=est, h=h, H=h^2, gridtype=gridtype.vec)
 
   if (positive)
   {
     ## compute transformation KDE
     fhat$estimate <- fhat$estimate / exp(gridy)
     fhat$x <- x
     fhat$eval.points <- gridx # exp(gridy) - adj.positive
   }
   
   class(fhat) <- "kde"
   
   return(fhat)
}

###############################################################################
# Bivariate kernel density estimate using normal kernels, evaluated over grid
#
# Parameters
# x - data points
# H - bandwidth matrix
# gridsize - number of interval points in grid
# supp - effective support of kernel
#
# Returns
# list with fields
# x - data points
# eval.points - points that KDE is evaluated at
# estimate - KDE evaluated at eval.points 
# H - bandwidth matrix 
###############################################################################

kde.grid.2d <- function(x, H, gridsize, supp, gridx=NULL, grid.pts=NULL, xmin, xmax, gridtype)
{
  # initialise grid 
  n <- nrow(x)
  if (is.null(gridx))
    gridx <- make.grid.ks(x, matrix.sqrt(H), tol=supp, gridsize=gridsize, xmin=xmin, xmax=xmax, gridtype=gridtype) 
       
  suppx <- make.supp(x, matrix.sqrt(H), tol=supp)

  if (is.null(grid.pts))
    grid.pts <- find.gridpts(gridx, suppx)    
  fhat.grid <- matrix(0, nrow=length(gridx[[1]]), ncol=length(gridx[[2]]))
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -