📄 kda.r

📁 r软件另一款可以计算核估计的软件包需安装r软件
💻 R
📖 第 1 页 / 共 2 页
字号:
12 下一页
############################################################################### Kernel discriminant analysis############################################################################################################################################################### Find bandwidths for each class in training set, for 2- to 6-dim ## Parameters# x - data values# group - group variable# bw - type of bandwidth selector# nstage, pilot, pre - parameters for plugin bandwidths# diag - FALSE - use full b/w matrices#      - TRUE - use diag b/w matrices## Returns# Matrix of bandwidths for each group in training set###############################################################################hkda <- function(x, x.group, bw="plugin", nstage=2, binned=TRUE, bgridsize){  grlab <- sort(unique(x.group))  m <- length(grlab)  bw <- substr(tolower(bw),1,1)  hs <- numeric(0)  if (missing(bgridsize)) bgridsize <- 401    for (i in 1:m)  {    y <- x[x.group==grlab[i]]    if (bw=="p")      h <- hpi(y, nstage=nstage, binned=TRUE, bgridsize=bgridsize)     hs <- c(hs, h)  }  return(hs)}   Hkda <- function(x, x.group, Hstart, bw="plugin", nstage=2, pilot="samse",                 pre="sphere", binned=FALSE, bgridsize){  d <- ncol(x)  grlab <- sort(unique(x.group))  m <- length(grlab)  bw <- substr(tolower(bw),1,1)  Hs <- numeric(0)  if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d)    for (i in 1:m)  {    y <- x[x.group==grlab[i],]    if (!missing(Hstart))     {      Hstarty <- Hstart[((i-1)*d+1) : (i*d),]      if (bw=="l")                H <- Hlscv(y, Hstart=Hstarty)      else if (bw=="s")        H <- Hscv(y, pre=pre, Hstart=Hstarty, binned=binned, bgridsize=bgridsize)      else if (bw=="p")        H <- Hpi(y, nstage=nstage, pilot=pilot, pre=pre, Hstart=Hstarty,                 binned=binned, bgridsize=bgridsize)     }    else    {      if (bw=="l")        H <- Hlscv(y)      else if (bw=="s")        H <- Hscv(y, pre=pre, binned=binned, bgridsize=bgridsize)      else if (bw=="p")        H <- Hpi(y, nstage=nstage, pilot=pilot, pre=pre, binned=binned, bgridsize=bgridsize)    }    Hs <- rbind(Hs, H)  }  return(Hs)   }Hkda.diag <- function(x, x.group, bw="plugin", nstage=2, pilot="samse",                 pre="sphere", binned=FALSE, bgridsize){  d <- ncol(x)  grlab <- sort(unique(x.group))  m <- length(grlab)  bw <- substr(tolower(bw),1,1)  Hs <- numeric(0)  if (missing(bgridsize) & binned) bgridsize <- default.gridsize(d)  for (i in 1:m)  {    y <- x[x.group==grlab[i],]    if (bw=="l")      H <- Hlscv.diag(y, binned=binned, bgridsize=bgridsize)    else if (bw=="p")       H <- Hpi.diag(y, nstage=nstage, pilot=pilot, pre=pre, binned=binned, bgridsize=bgridsize)    else if (bw=="s")      H <- Hscv.diag(y, pre=pre, binned=binned, bgridsize=bgridsize)    Hs <- rbind(Hs, H)  }  return(Hs)   }################################################################################ Classify data set according to discriminant analysis based on training data# for 1- to 6-dim## Parameter# x - training data# x.group - group variable for x# y - data values to be classified# Hs - bandwidth matrices# prior.prob - prior probabilities## Returns# Group classification of data set y###############################################################################kda <- function(x, x.group, Hs, hs, y, prior.prob=NULL){  if (is.vector(x))  {    disc.gr <- kda.1d(x=x, x.group=x.group, hs=hs, y=y, prior.prob=prior.prob)  }  else  {      if (is.data.frame(x)) x <- as.matrix(x)    if (is.data.frame(y)) y <- as.matrix(y)    gr <- sort(unique(x.group))    ##d <- ncol(x)       ## if prior.prob is NULL then use sample proportions    if (is.null(prior.prob))    {      prior.prob <- rep(0, length(gr))      for (j in 1:length(gr))        prior.prob[j] <- length(which(x.group==gr[j]))      prior.prob <- prior.prob/nrow(x)    }        if (!(identical(all.equal(sum(prior.prob), 1), TRUE)))        stop("Sum of prior weights not equal to 1")        ## Compute KDE and weighted KDE     m <- length(gr)    fhat <- kda.kde(x, x.group, Hs=Hs, eval.points=y)    fhat.wt <- matrix(0, ncol=m, nrow=nrow(y))          for (j in 1:m)      fhat.wt[,j] <- fhat$est[[j]]* prior.prob[j]        ## Assign y according largest weighted density value     disc.gr.temp <- apply(fhat.wt, 1, which.max)        disc.gr <- gr    for (j in 1:m)    {      ind <- which(disc.gr.temp==j)      disc.gr[ind] <- gr[j]    }  }    return(disc.gr) }kda.1d <- function(x, x.group, hs, y, prior.prob=NULL){   gr <- sort(unique(x.group))  # if prior.prob is NULL then use sample proportions  if (is.null(prior.prob))  {    prior.prob <- rep(0, length(gr))    for (j in 1:length(gr))      prior.prob[j] <- length(which(x.group==gr[j]))    prior.prob <- prior.prob/length(x)  }    if (!(identical(all.equal(sum(prior.prob), 1), TRUE)))      stop("Sum of prior weights not equal to 1")  ## Compute KDE and weighted KDE   m <- length(gr)  fhat <- kda.kde(x, x.group, hs=hs, eval.points=y)  fhat.wt <- matrix(0, ncol=m, nrow=length(y))      for (j in 1:m)    fhat.wt[,j] <- fhat$estimate[[j]]* prior.prob[j]  ## Assign y according largest weighted density value   disc.gr.temp <- apply(fhat.wt, 1, which.max)  disc.gr <- gr  for (j in 1:m)  {    ind <- which(disc.gr.temp==j)    disc.gr[ind] <- gr[j]  }   return(disc.gr) }################################################################################ Compares true group classification with an estimated one## Parameters# group - true group variable# est.group - estimated group variable## Returns# List with components# comp - cross-classification table of groupings - true groups are the rows,#        estimated groups are the columns# error - total mis-classification rate###############################################################################compare <- function(x.group, est.group, by.group=FALSE){  if (length(x.group)!=length(est.group))    stop("Group label vectors not the same length")    grlab <- sort(unique(x.group))  m <- length(grlab)  comp <- matrix(0, nr=m, nc=m)    for (i in 1:m)    for (j in 1:m)      comp[i,j] <- sum((x.group==grlab[i]) & (est.group==grlab[j]))    if (by.group)  {    er <- vector()    for (i in 1:m)      er[i] <- 1-comp[i,i]/rowSums(comp)[i]    er <- matrix(er, nc=1)    er <- rbind(er, 1 - sum(diag(comp))/sum(comp))     rownames(er) <- c(as.character(paste(grlab, "(true)")), "Total")    colnames(er) <- "error"      }  else     er <- 1 - sum(diag(comp))/sum(comp)    comp <- cbind(comp, rowSums(comp))  comp <- rbind(comp, colSums(comp))  colnames(comp) <- c(as.character(paste(grlab, "(est.)")), "Total")  rownames(comp) <- c(as.character(paste(grlab, "(true)")), "Total")  return(list(cross=comp, error=er)) }################################################################################ Computes cross-validated misclassification rates (for use when test data is# not independent of training data) for KDA## Parameters# x - training data# x.group - group variable for x# y - data values to be classified# Hs - bandwidth matrices# prior.prob - prior probabilities## Returns# List with components# comp - cross-classification table of groupings - true groups are the rows,#        estimated groups are the columns# error - total mis-classification rate###############################################################################compare.kda.cv <- function(x, x.group, bw="plugin",    prior.prob=NULL, Hstart, by.group=FALSE, trace=FALSE, binned=FALSE,    bgridsize, recompute=FALSE, ...){  ## 1-d  if (is.vector(x))  {    n <- length(x)    h <- hkda(x, x.group, bw=bw, binned=binned, bgridsize=bgridsize, ...)    gr <- sort(unique(x.group))     kda.cv.gr <- x.group    for (i in 1:n)    {      h.mod <- h      ## find group that x[i] belongs to       ind <- which(x.group[i]==gr)      indx <- x.group==gr[ind]      indx[i] <- FALSE      if (substr(bw,1,1)=="p")        h.temp <- hpi(x[indx], binned=binned, bgridsize=bgridsize, ...)      h.mod[ind] <- h.temp          ## recompute KDA estimate of groups with x[i] excluded            if (trace)        cat(paste("Processing data item:", i, "\n"))            kda.cv.gr[i] <- kda(x[-i], x.group[-i], hs=h.mod, y=x, prior.prob=prior.prob)[i]    }    return(compare(x.group, kda.cv.gr, by.group=by.group))   }  ## multi-dimensional     n <- nrow(x)  d <- ncol(x)    if (!missing(Hstart))    H <- Hkda(x, x.group, bw=bw, Hstart=Hstart, binned=binned, bgridsize=bgridsize, ...)  else    H <- Hkda(x, x.group, bw=bw, binned=binned, bgridsize=bgridsize, ...)  ### classify data x using KDA rules based on x itself  ##kda.group <- kda(x, x.group, Hs=H, y=x, prior.prob=prior.prob)  ##comp <- compare(x.group, kda.group)   gr <- sort(unique(x.group))   kda.cv.gr <- x.group    for (i in 1:n)  {    H.mod <- H    ### find group that x[i] belongs to     ind <- which(x.group[i]==gr)    indx <- x.group==gr[ind]    indx[i] <- FALSE    if (recompute)    {      ## compute b/w matrix for that group with x[i] excluded      if (!missing(Hstart))      {          Hstart.temp <- Hstart[((ind-1)*d+1):(ind*d),]                if (substr(bw,1,1)=="p")          H.temp <- Hpi(x[indx,], Hstart=Hstart.temp, binned=binned, bgridsize=bgridsize,...)        else if (substr(bw,1,1)=="s")          H.temp <- Hscv(x[indx,],  Hstart=Hstart.temp, binned=binned, bgridsize=bgridsize,...)        else if (substr(bw,1,1)=="l")           H.temp <- Hlscv(x[indx,],  Hstart=Hstart.temp, ...)      }      else      {        if (substr(bw,1,1)=="p")          H.temp <- Hpi(x[indx,], binned=binned, bgridsize=bgridsize, ...)        else if (substr(bw,1,1)=="s")          H.temp <- Hscv(x[indx,], binned=binned, bgridsize=bgridsize, ...)        else if (substr(bw,1,1)=="l")          H.temp <- Hlscv(x[indx,], ...)       }            H.mod[((ind-1)*d+1):(ind*d),] <- H.temp    }    ## recompute KDA estimate of groups with x[i] excluded          if (trace)      cat(paste("Processing data item:", i, "\n"))    kda.cv.gr[i] <- kda(x[-i,], x.group[-i], Hs=H.mod, y=x, prior.prob=prior.prob)[i]  }    return(compare(x.group, kda.cv.gr, by.group=by.group)) }################################################################################## Same as compare.kda.cv except uses diagonal b/w matrices###############################################################################compare.kda.diag.cv <- function(x, x.group, bw="plugin", prior.prob=NULL,   by.group=FALSE, trace=FALSE, binned=FALSE, bgridsize, recompute=FALSE, ...){  n <- nrow(x)  d <- ncol(x)  H <- Hkda.diag(x, x.group, bw=bw, binned=binned, bgridsize=bgridsize, ...)  ##kda.group <- kda(x, x.group, Hs=H, y=x, prior.prob=prior.prob)  ##comp <- compare(x.group, kda.group)   gr <- sort(unique(x.group))   kda.cv.gr <- x.group    for (i in 1:n)  {    H.mod <- H    if (recompute)    {      ind <- which(x.group[i]==gr)      indx <- x.group==gr[ind]      indx[i] <- FALSE      if (substr(bw,1,1)=="p")        H.temp <- Hpi.diag(x[indx,],  binned=binned, bgridsize=bgridsize, ...)      else if (substr(bw,1,1)=="l")        H.temp <- Hlscv.diag(x[indx,], binned=binned, bgridsize=bgridsize, ...)            H.mod[((ind-1)*d+1):(ind*d),] <- H.temp    }        if (trace)      cat(paste("Processing data item:", i, "\n"))    kda.cv.gr[i] <- kda(x[-i,], x.group[-i], Hs=H.mod, y=x, prior.prob=prior.prob)[i]    }    return(compare(x.group, kda.cv.gr, by.group=by.group)) }################################################################################ KDEs of individual densities for KDA - 1- to 3-dim## Parameters# x - data values# group - group variable# Hs - bandwidth matrices## Returns# List with components (class dade)# x - list of data values# eval.points - evaluation points of dnesity estimate# estimate - list of density estimate# H - list of bandwidth matrices##############################################################################kda.kde <- function(x, x.group, Hs, hs, prior.prob=NULL, gridsize, xmin, xmax, supp=3.7, eval.points=NULL, binned=FALSE, bgridsize){  if (is.vector(x))  {    if (missing(gridsize))  gridsize <- 101    if (missing(bgridsize)) bgridsize <- 401    fhat.list <- kda.kde.1d(x=x, x.group=x.group, hs=hs, prior.prob=prior.prob, gridsize=gridsize, eval.points=eval.points, supp=supp, binned=binned, bgridsize=bgridsize, xmin=xmin, xmax=xmax)  }  else  {    if (is.data.frame(x)) x <- as.matrix(x)    grlab <- sort(unique(x.group))    m <- length(grlab)    d <- ncol(x)    ## find largest bandwidth matrix to initialise grid    detH <- vector()     for (j in 1:m)      detH[j] <- det(Hs[((j-1)*d+1) : (j*d),])      Hmax.ind <- which.max(detH)    Hmax <- Hs[((Hmax.ind-1)*d+1) : (Hmax.ind*d),]
12 下一页
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -