📄 cde.bandwidths.r

📁 r软件另一软件包计算核估计必须先安装r软件
💻 R
📖 第 1 页 / 共 2 页
字号:
上一页 12
    idx <- idx[!is.na(idx)]
    if(idx==1 | idx==na)
    {
        warning("No minimum found")
        a <- NA
    }
    else
        a <- a.grid[idx]
    return(list(a=a,a.grid=a.grid,q=q))
}

"cdeband.bootstrap" <- function(x,y,a.grid,b.grid,m=25,nx=30,ny=25,y.margin)
{
    ## Fit parametric model and calculate parametric conditional density
    df <- data.frame(x=x,y=y)
    aic <- numeric(4)
    n <- length(x)
    fit1 <- lm(y ~ 1,data=df)
    aic[1] <- deviance(fit1) + 2*(n-fit1$df.residual)*deviance(fit1)/fit1$df.resid
    fit2 <- lm(y ~ x,data=df)
    aic[2] <- deviance(fit2) + 2*(n-fit2$df.residual)*deviance(fit2)/fit2$df.resid
    fit3 <- lm(y ~ x + I(x^2),data=df)
    aic[3] <- deviance(fit3) + 2*(n-fit3$df.residual)*deviance(fit3)/fit3$df.resid
    fit4 <- lm(y~ x+I(x^2)+I(x^3),data=df)
    aic[4] <- deviance(fit4) + 2*(n-fit4$df.residual)*deviance(fit4)/fit4$df.resid
    fit <- switch((1:4)[aic==min(aic)],fit1,fit2,fit3,fit4)
    rse <- summary(fit)$sigma
    fits <- fitted(fit)
    n <- length(x)
    if(missing(y.margin))
        y.margin <- seq(min(y),max(y),l=ny)
    else
        ny <- length(y.margin)
    if(length(x)<nx)
    {
        x.margin <- sort(x)
        nx <- length(x)
    }
    else
        x.margin <- sort(sample(x,nx))
    fit.grid <- approx(x,fits,xout=x.margin,rule=2)$y  # Quicker than predict and works when constant model used.
    truecde <- list(x=x.margin,y=y.margin,z=matrix(NA,nx,ny))
    for(i in 1:nx)
        truecde$z[i,] <- dnorm(y.margin,fit.grid[i],rse)

    ## Get bandwidth grids

    bands <- cdeband.rules0(x,y)
    if(missing(a.grid))
        a.grid <- bands$a*seq(0.4,2,0.2)
    if(missing(b.grid))
        b.grid <- bands$b*seq(0.4,2,0.2)

    ## Simulate bootstrap samples

    m <- max(m,5)
    bootsamples <- matrix(NA,nrow=n,ncol=m)
    for(i in 1:m)
        bootsamples[,i] <- fits + rnorm(n,0,rse)

    ## Calculate IMSE

    diff.cde <- numeric(m)
    na <- length(a.grid)
    nb <- length(b.grid)
    delta <- y.margin[2]-y.margin[1]
    imse <- matrix(NA,na,nb)
    cat("\n Trying (a,b)=")

    ## First pass

    for(i in 1:na)
    {
        a <- a.grid[i]
        for(j in 1:nb)
        {
            b <- b.grid[j]
            cat("(",round(a,3),",",round(b,3),") ",sep="")
            for(k in 1:3)
            {
                bootcde <- cde(x,bootsamples[,k],deg=0,link="identity",a=a,b=b,x.margin=x.margin,y.margin=y.margin)
                diff.cde[k] <- sum((bootcde$z - truecde$z)^2)/(m*nx)
            }
            imse[i,j] <- mean(diff.cde[1:3])
        }
    }
    cat("\n")

    ## Second pass near minimum

    idx <- (imse==min(imse))
    idx.a <- max(3,(1:na)[apply(idx,1,sum)==1]) + c(-2:2)
    idx.b <- max(3,(1:nb)[apply(idx,2,sum)==1]) + c(-2:2)
    idx.a <- idx.a[idx.a>0 & idx.a <=na]
    idx.b <- idx.b[idx.b>0 & idx.b <=nb]

    for(i in idx.a)
    {
        a <- a.grid[i]
        for(j in idx.b)
        {
            b <- b.grid[j]
            cat("(",round(a,3),",",round(b,3),") ",sep="")
            for(k in 4:m)
            {
                bootcde <- cde(x,bootsamples[,k],deg=0,link="identity",a=a,b=b,x.margin=x.margin,y.margin=y.margin)
                diff.cde[k] <- sum((bootcde$z - truecde$z)^2)/(m*nx)
            }
            imse[i,j] <- (3*imse[i,j] + (m-3)*mean(diff.cde[4:m]))/m
        }
    }
    cat("\n")

    # Find minimum on grid
    idx <- (imse==min(imse))
    idx.a <- (1:na)[apply(idx,1,sum)>0]
    idx.b <- (1:nb)[apply(idx,2,sum)>0]
    best.a <- a.grid[idx.a[1]]
    best.b <- b.grid[idx.b[1]]
    bestmse <- imse[idx.a[1],idx.b[1]]

    ## Fit quadratic surface to points near minimum and find minimum of surface
    na <- length(idx.a)
    nb <- length(idx.b)
    if(na>2 & nb>2)
    {
        fit <- lm(imse ~ a + b + I(a^2) + I(b^2) + I(a*b),
                data=data.frame(imse=c(imse[idx.a,idx.b]),a=rep(a.grid[idx.a],nb),b=rep(b.grid[idx.b],rep(na,nb))))
        best.b <- (fit$coef[2]*fit$coef[6] - 2*fit$coef[3]*fit$coef[4])/(4*fit$coef[4]*fit$coef[5] - fit$coef[6]*fit$coef[6])
        best.a <- (best.b*fit$coef[6] + fit$coef[2])/(-2*fit$coef[4])
    }
    else if(nb>2) # na=1 or 2
    {
        for(i in 1:na)
        {
            fit <- lm(imse ~ b+I(b^2),data=data.frame(imse=imse[idx.a[i],idx.b],b=b.grid[idx.b]))
            bestb <- -0.5*fit$coef[2]/fit$coef[3]
            minimse <- predict(fit,newdata=data.frame(b=bestb))
            if(minimse<bestmse & bestb >= min(b.grid) & bestb <= max(b.grid))
            {
                best.b <- bestb
                best.a <- a.grid[idx.a[i]]
                bestmse <- minimse
            }
        }
    }
    else if(na>2) # nb=1 or 2
    {
        for(i in 1:nb)
        {
            fit <- lm(imse ~ a+I(a^2),data=data.frame(imse=imse[idx.a,idx.b[i]],a=a.grid[idx.a]))
            besta <- -0.5*fit$coef[2]/fit$coef[3]
            minimse <- predict(fit,newdata=data.frame(a=besta))
            if(minimse<bestmse  & besta >= min(a.grid) & besta <= max(a.grid))
            {
                best.a <- besta
                best.b <- b.grid[idx.b[i]]
                bestmse <- minimse
            }
        }
    }

    return(list(a=best.a,b=best.b,a.grid=a.grid,b.grid=b.grid,imse=imse*delta))
}

## This function called by cdeband.regress.

CDEband.regress <- function(x, y, a.grid, b, y.margin, penalty=4, tol=0.999)
{
    # Calculate v
    n <- length(x)
    ny <- length(y.margin)
    if(ny>1)
        delta <- y.margin[2]-y.margin[1]
    else
        delta <- 1
    v <- Kernel(y,y.margin,b)
    na <- length(a.grid)
    q <- numeric(na)
    cat("\n Trying a=")
    for(i in 1:na)
    {
        cat(round(a.grid[i],3)," ")
        w <- Kernel(x,x,a.grid[i])
        row.sum <- apply(w,1,sum)
        w <- w  / matrix(rep(row.sum,n),ncol=n)
        junk <- (v - (t(w) %*% v))^2
        diag.w <- diag(w)
        pen <- switch(penalty,1+2*diag.w,1/(1-diag.w)^2,exp(2*diag.w),(1+diag.w)/(1-diag.w),1/(1-2*diag.w))
        tmp <- apply(junk,1,sum)
        q[i] <- mean(tmp*pen,na.rm=TRUE)
#        if(sum(diag.w>tol)>0)
#            q[i] <- Inf
#        else
#        {
#            pen <- switch(penalty,1+2*diag.w,1/(1-diag.w)^2,exp(2*diag.w),(1+diag.w)/(1-diag.w),1/(1-2*diag.w))
#            q[i] <- mean(apply(junk,1,sum)*pen)
#        }
    }
    cat("\n")
    idx <- (1:na)[q==min(q)]
    if(idx==1 | idx==na)
    {
#        warning("No minimum found")
        a <- NA
    }
    else
        a <- a.grid[idx]
#    browser()
    return(list(a=a,a.grid=a.grid,q=q*delta))
}

cdeband.regress <- function(x,y,a.grid,b,ny=25,use.sample=FALSE,nx=100,y.margin,passes=2,usequad=TRUE,penalty=4,tol=0.999)
{
    ## Initialization

    bands <- cdeband.rules0(x,y)
    if(missing(a.grid))
        a.grid <- bands$a*seq(0.01,3,l=10)
    if(missing(b))
        b <- bands$b
    a.grid <- a.grid[a.grid>0]
    na <- length(a.grid)

    n <- length(x)
    if(use.sample & n > nx)
        idx <- sample(1:n,nx,replace=FALSE)
    else
        idx <- 1:n
    xx <- x[idx]
    yy <- y[idx]
    if(missing(y.margin))
        y.margin <- seq(min(y),max(y),l=ny)

    ## First pass

    first <- CDEband.regress(xx,yy,a.grid,b,y.margin,penalty=penalty,tol=tol)
    if(is.na(first$a)) # Expand grid
    {
        a.grid <- seq(a.grid[1]/20,1.1*a.grid[na],l=50)
        firstb <- CDEband.regress(xx,yy,a.grid,b,y.margin,penalty=penalty,tol=tol)
        first$a.grid <- c(first$a.grid,firstb$a.grid)
        idx <- order(first$a.grid)
        first$q <- c(first$q,firstb$q)[idx]
        first$a.grid <- first$a.grid[idx]
        first$a <- firstb$a
    }

    ## Second pass

    if(passes>1 & !is.na(first$a))
    {
        na <- length(first$a.grid)
        idx <- c(1:na)[first$a.grid==first$a]
        if(na>4)
            idx <- idx + (-2:2)
        else
            idx <- idx + (-1:1)
        idx <- idx[idx>=1 & idx<=na]
        newa.grid <- seq(0.95*first$a.grid[idx[1]],1.05*first$a.grid[idx[length(idx)]],l=length(a.grid))
        second <- CDEband.regress(xx,yy,newa.grid,b,y.margin,tol=tol,penalty=penalty)
        fulla.grid <- c(first$a.grid,second$a.grid)
        idx <- order(fulla.grid)
        fullq <- c(first$q,second$q)[idx]
        fulla.grid <- fulla.grid[idx]
        a <- second$a
    }
    else
    {
        a <- first$a
        fulla.grid <- first$a.grid
        fullq <- first$q
    }

    ## Quadratic solution

    if(!is.na(a) & usequad)
    {
        na <- length(fulla.grid)
        idx <- (c(1:na)[fulla.grid==a])[1]
        if(na>4)
            idx <- idx + (-2:2)
        else
            idx <- idx + (-1:1)
        idx <- idx[idx>=1 & idx<=na & fullq[idx] != Inf]
        if(length(idx)>3)
        {
            fit <- lm(q ~ a+I(a^2),data=data.frame(q=fullq[idx],a=fulla.grid[idx]))
            besta <- -0.5*fit$coef[2]/fit$coef[3]
            if(besta >0)
                a <- besta
        }
    }

    return(list(a=a,b=b,a.grid=fulla.grid,q=fullq))
}

"Lsquare" <- function(x,y,sdlinear=TRUE)
{
    # Fits linear model.
    # If sdlinear=TRUE, allows heteroskedastic errors.

    x <- c(x)
    y <- c(y)
    model <- lm(y~x)
    res <- residuals(model)
    pc <- sqrt(sum(res^2)/(length(x)-2))
    if(!sdlinear)
        return(list(pc=pc,dx=model$coef[2]))

    ## Initial estimate of heteroskedasticity
    model.res <- lm(abs(res)~x)

    ## Recalculate linear fit
    weights <- fitted(model.res)^(-2)
    model <- lm(y~x,weights=weights)

    ## Recalculate heteroskedasticity
    res <- residuals(model)
    f1 <- function(co,res,x)
    {
        sum((res * res - (co[1] + co[2] * x)^2)^2)
    }
    junk <- nlm(f1,model.res$coef,res=res,x=x)
    return(list(pc=pc,pl=junk$estimate[1],qx=junk$estimate[2],dx=model$coef[2]))
}
上一页 12
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -