📄 cde.bandwidths.r
字号:
idx <- idx[!is.na(idx)]
if(idx==1 | idx==na)
{
warning("No minimum found")
a <- NA
}
else
a <- a.grid[idx]
return(list(a=a,a.grid=a.grid,q=q))
}
"cdeband.bootstrap" <- function(x,y,a.grid,b.grid,m=25,nx=30,ny=25,y.margin)
{
## Fit parametric model and calculate parametric conditional density
df <- data.frame(x=x,y=y)
aic <- numeric(4)
n <- length(x)
fit1 <- lm(y ~ 1,data=df)
aic[1] <- deviance(fit1) + 2*(n-fit1$df.residual)*deviance(fit1)/fit1$df.resid
fit2 <- lm(y ~ x,data=df)
aic[2] <- deviance(fit2) + 2*(n-fit2$df.residual)*deviance(fit2)/fit2$df.resid
fit3 <- lm(y ~ x + I(x^2),data=df)
aic[3] <- deviance(fit3) + 2*(n-fit3$df.residual)*deviance(fit3)/fit3$df.resid
fit4 <- lm(y~ x+I(x^2)+I(x^3),data=df)
aic[4] <- deviance(fit4) + 2*(n-fit4$df.residual)*deviance(fit4)/fit4$df.resid
fit <- switch((1:4)[aic==min(aic)],fit1,fit2,fit3,fit4)
rse <- summary(fit)$sigma
fits <- fitted(fit)
n <- length(x)
if(missing(y.margin))
y.margin <- seq(min(y),max(y),l=ny)
else
ny <- length(y.margin)
if(length(x)<nx)
{
x.margin <- sort(x)
nx <- length(x)
}
else
x.margin <- sort(sample(x,nx))
fit.grid <- approx(x,fits,xout=x.margin,rule=2)$y # Quicker than predict and works when constant model used.
truecde <- list(x=x.margin,y=y.margin,z=matrix(NA,nx,ny))
for(i in 1:nx)
truecde$z[i,] <- dnorm(y.margin,fit.grid[i],rse)
## Get bandwidth grids
bands <- cdeband.rules0(x,y)
if(missing(a.grid))
a.grid <- bands$a*seq(0.4,2,0.2)
if(missing(b.grid))
b.grid <- bands$b*seq(0.4,2,0.2)
## Simulate bootstrap samples
m <- max(m,5)
bootsamples <- matrix(NA,nrow=n,ncol=m)
for(i in 1:m)
bootsamples[,i] <- fits + rnorm(n,0,rse)
## Calculate IMSE
diff.cde <- numeric(m)
na <- length(a.grid)
nb <- length(b.grid)
delta <- y.margin[2]-y.margin[1]
imse <- matrix(NA,na,nb)
cat("\n Trying (a,b)=")
## First pass
for(i in 1:na)
{
a <- a.grid[i]
for(j in 1:nb)
{
b <- b.grid[j]
cat("(",round(a,3),",",round(b,3),") ",sep="")
for(k in 1:3)
{
bootcde <- cde(x,bootsamples[,k],deg=0,link="identity",a=a,b=b,x.margin=x.margin,y.margin=y.margin)
diff.cde[k] <- sum((bootcde$z - truecde$z)^2)/(m*nx)
}
imse[i,j] <- mean(diff.cde[1:3])
}
}
cat("\n")
## Second pass near minimum
idx <- (imse==min(imse))
idx.a <- max(3,(1:na)[apply(idx,1,sum)==1]) + c(-2:2)
idx.b <- max(3,(1:nb)[apply(idx,2,sum)==1]) + c(-2:2)
idx.a <- idx.a[idx.a>0 & idx.a <=na]
idx.b <- idx.b[idx.b>0 & idx.b <=nb]
for(i in idx.a)
{
a <- a.grid[i]
for(j in idx.b)
{
b <- b.grid[j]
cat("(",round(a,3),",",round(b,3),") ",sep="")
for(k in 4:m)
{
bootcde <- cde(x,bootsamples[,k],deg=0,link="identity",a=a,b=b,x.margin=x.margin,y.margin=y.margin)
diff.cde[k] <- sum((bootcde$z - truecde$z)^2)/(m*nx)
}
imse[i,j] <- (3*imse[i,j] + (m-3)*mean(diff.cde[4:m]))/m
}
}
cat("\n")
# Find minimum on grid
idx <- (imse==min(imse))
idx.a <- (1:na)[apply(idx,1,sum)>0]
idx.b <- (1:nb)[apply(idx,2,sum)>0]
best.a <- a.grid[idx.a[1]]
best.b <- b.grid[idx.b[1]]
bestmse <- imse[idx.a[1],idx.b[1]]
## Fit quadratic surface to points near minimum and find minimum of surface
na <- length(idx.a)
nb <- length(idx.b)
if(na>2 & nb>2)
{
fit <- lm(imse ~ a + b + I(a^2) + I(b^2) + I(a*b),
data=data.frame(imse=c(imse[idx.a,idx.b]),a=rep(a.grid[idx.a],nb),b=rep(b.grid[idx.b],rep(na,nb))))
best.b <- (fit$coef[2]*fit$coef[6] - 2*fit$coef[3]*fit$coef[4])/(4*fit$coef[4]*fit$coef[5] - fit$coef[6]*fit$coef[6])
best.a <- (best.b*fit$coef[6] + fit$coef[2])/(-2*fit$coef[4])
}
else if(nb>2) # na=1 or 2
{
for(i in 1:na)
{
fit <- lm(imse ~ b+I(b^2),data=data.frame(imse=imse[idx.a[i],idx.b],b=b.grid[idx.b]))
bestb <- -0.5*fit$coef[2]/fit$coef[3]
minimse <- predict(fit,newdata=data.frame(b=bestb))
if(minimse<bestmse & bestb >= min(b.grid) & bestb <= max(b.grid))
{
best.b <- bestb
best.a <- a.grid[idx.a[i]]
bestmse <- minimse
}
}
}
else if(na>2) # nb=1 or 2
{
for(i in 1:nb)
{
fit <- lm(imse ~ a+I(a^2),data=data.frame(imse=imse[idx.a,idx.b[i]],a=a.grid[idx.a]))
besta <- -0.5*fit$coef[2]/fit$coef[3]
minimse <- predict(fit,newdata=data.frame(a=besta))
if(minimse<bestmse & besta >= min(a.grid) & besta <= max(a.grid))
{
best.a <- besta
best.b <- b.grid[idx.b[i]]
bestmse <- minimse
}
}
}
return(list(a=best.a,b=best.b,a.grid=a.grid,b.grid=b.grid,imse=imse*delta))
}
## This function called by cdeband.regress.
CDEband.regress <- function(x, y, a.grid, b, y.margin, penalty=4, tol=0.999)
{
# Calculate v
n <- length(x)
ny <- length(y.margin)
if(ny>1)
delta <- y.margin[2]-y.margin[1]
else
delta <- 1
v <- Kernel(y,y.margin,b)
na <- length(a.grid)
q <- numeric(na)
cat("\n Trying a=")
for(i in 1:na)
{
cat(round(a.grid[i],3)," ")
w <- Kernel(x,x,a.grid[i])
row.sum <- apply(w,1,sum)
w <- w / matrix(rep(row.sum,n),ncol=n)
junk <- (v - (t(w) %*% v))^2
diag.w <- diag(w)
pen <- switch(penalty,1+2*diag.w,1/(1-diag.w)^2,exp(2*diag.w),(1+diag.w)/(1-diag.w),1/(1-2*diag.w))
tmp <- apply(junk,1,sum)
q[i] <- mean(tmp*pen,na.rm=TRUE)
# if(sum(diag.w>tol)>0)
# q[i] <- Inf
# else
# {
# pen <- switch(penalty,1+2*diag.w,1/(1-diag.w)^2,exp(2*diag.w),(1+diag.w)/(1-diag.w),1/(1-2*diag.w))
# q[i] <- mean(apply(junk,1,sum)*pen)
# }
}
cat("\n")
idx <- (1:na)[q==min(q)]
if(idx==1 | idx==na)
{
# warning("No minimum found")
a <- NA
}
else
a <- a.grid[idx]
# browser()
return(list(a=a,a.grid=a.grid,q=q*delta))
}
cdeband.regress <- function(x,y,a.grid,b,ny=25,use.sample=FALSE,nx=100,y.margin,passes=2,usequad=TRUE,penalty=4,tol=0.999)
{
## Initialization
bands <- cdeband.rules0(x,y)
if(missing(a.grid))
a.grid <- bands$a*seq(0.01,3,l=10)
if(missing(b))
b <- bands$b
a.grid <- a.grid[a.grid>0]
na <- length(a.grid)
n <- length(x)
if(use.sample & n > nx)
idx <- sample(1:n,nx,replace=FALSE)
else
idx <- 1:n
xx <- x[idx]
yy <- y[idx]
if(missing(y.margin))
y.margin <- seq(min(y),max(y),l=ny)
## First pass
first <- CDEband.regress(xx,yy,a.grid,b,y.margin,penalty=penalty,tol=tol)
if(is.na(first$a)) # Expand grid
{
a.grid <- seq(a.grid[1]/20,1.1*a.grid[na],l=50)
firstb <- CDEband.regress(xx,yy,a.grid,b,y.margin,penalty=penalty,tol=tol)
first$a.grid <- c(first$a.grid,firstb$a.grid)
idx <- order(first$a.grid)
first$q <- c(first$q,firstb$q)[idx]
first$a.grid <- first$a.grid[idx]
first$a <- firstb$a
}
## Second pass
if(passes>1 & !is.na(first$a))
{
na <- length(first$a.grid)
idx <- c(1:na)[first$a.grid==first$a]
if(na>4)
idx <- idx + (-2:2)
else
idx <- idx + (-1:1)
idx <- idx[idx>=1 & idx<=na]
newa.grid <- seq(0.95*first$a.grid[idx[1]],1.05*first$a.grid[idx[length(idx)]],l=length(a.grid))
second <- CDEband.regress(xx,yy,newa.grid,b,y.margin,tol=tol,penalty=penalty)
fulla.grid <- c(first$a.grid,second$a.grid)
idx <- order(fulla.grid)
fullq <- c(first$q,second$q)[idx]
fulla.grid <- fulla.grid[idx]
a <- second$a
}
else
{
a <- first$a
fulla.grid <- first$a.grid
fullq <- first$q
}
## Quadratic solution
if(!is.na(a) & usequad)
{
na <- length(fulla.grid)
idx <- (c(1:na)[fulla.grid==a])[1]
if(na>4)
idx <- idx + (-2:2)
else
idx <- idx + (-1:1)
idx <- idx[idx>=1 & idx<=na & fullq[idx] != Inf]
if(length(idx)>3)
{
fit <- lm(q ~ a+I(a^2),data=data.frame(q=fullq[idx],a=fulla.grid[idx]))
besta <- -0.5*fit$coef[2]/fit$coef[3]
if(besta >0)
a <- besta
}
}
return(list(a=a,b=b,a.grid=fulla.grid,q=fullq))
}
"Lsquare" <- function(x,y,sdlinear=TRUE)
{
# Fits linear model.
# If sdlinear=TRUE, allows heteroskedastic errors.
x <- c(x)
y <- c(y)
model <- lm(y~x)
res <- residuals(model)
pc <- sqrt(sum(res^2)/(length(x)-2))
if(!sdlinear)
return(list(pc=pc,dx=model$coef[2]))
## Initial estimate of heteroskedasticity
model.res <- lm(abs(res)~x)
## Recalculate linear fit
weights <- fitted(model.res)^(-2)
model <- lm(y~x,weights=weights)
## Recalculate heteroskedasticity
res <- residuals(model)
f1 <- function(co,res,x)
{
sum((res * res - (co[1] + co[2] * x)^2)^2)
}
junk <- nlm(f1,model.res$coef,res=res,x=x)
return(list(pc=pc,pl=junk$estimate[1],qx=junk$estimate[2],dx=model$coef[2]))
}
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -