📄 mcmcmnl.r

📁 使用R语言的马尔科夫链蒙特卡洛模拟（MCMC）源代码程序。
💻 R
字号:
## MCMCmnl.R samples from the posterior distribution of a multinomial## logit model using Metropolis-Hastings.#### KQ 12/22/2004## parse formula and return a list that contains the model response## matrix as element one, the model matrix as element two,## the column names of X as element three, the rownames of## X and y as element four, and the number of choices in the largest## choice set in element five"parse.formula.mnl" <- function(formula, data, baseline=NULL,                                intercept=TRUE){    ## extract Y, X, and variable names for model formula and frame  mt <- terms(formula, data=data)  if(missing(data)) data <- sys.frame(sys.parent())  mf <- match.call(expand.dots = FALSE)  mf$intercept <- mf$baseline <- NULL  mf$drop.unused.levels <- TRUE  mf[[1]] <- as.name("model.frame")    mf <- eval(mf, sys.frame(sys.parent()))  if (!intercept){    attributes(mt)$intercept <- 0  }      ## deal with Y  Y <- as.matrix(model.response(mf, "numeric")) # Y matrix  if (ncol(Y)==1){    Y <- factor(Y)    number.choices <- length(unique(Y))    choice.names <- sort(unique(Y))    Ymat <- matrix(NA, length(Y), number.choices)    colnames(Ymat) <- choice.names    for (i in 1:(number.choices)){      Ymat[,i] <- as.numeric(Y==choice.names[i])              }  }  else{    ## this block will allow for nonconstant choice sets    number.choices <- ncol(Y)    Ytemp <- Y    Ytemp[Y== -999] <- NA    if ( min(unique(array(Y)) %in% c(-999,0,1))==0 ||        min(apply(Ytemp, 1, sum, na.rm=TRUE) == rep(1, nrow(Y)))==0){      stop("Y is a matrix but it is not composed of 0/1/-999 values\n and/or rows do not sum to 1\n")    }    Ymat <- Y    choice.names <- colnames(Y)  }  colnames(Ymat) <- choice.names  rownames(Ymat) <- 1:nrow(Ymat)    #Y.long <- matrix(t(Ymat), length(Ymat), 1)  #colnames(Y.long) <- "Y"  #rownames(Y.long) <- rep(1:nrow(Ymat), rep(length(choice.names), nrow(Ymat)))  #rownames(Y.long) <- paste(rownames(Y.long), choice.names, sep=".")  #group.id <- rep(1:nrow(Ymat), rep(ncol(Ymat), nrow(Ymat)))    ## deal with X  ## null model support  X <- if (!is.empty.model(mt)) model.matrix(mt, mf, contrasts)  X <- as.matrix(X)         # X matrix  xvars <- dimnames(X)[[2]] # X variable names  xobs  <- dimnames(X)[[1]] # X observation names    if (is.null(baseline)){    baseline <- choice.names[1]  }  if (! baseline %in% choice.names){    stop("'baseline' not consistent with observed choice levels in y\n")  }    ## deal with choice specific covariates  choicevar.indic <- rep(FALSE, length(xvars)) # indicators for choice                                               # specific variables  choicevar.indic[grep("^choicevar\\(", xvars)] <- TRUE  if (sum(choicevar.indic) > 0){    cvarname1.vec <- rep(NA, sum(choicevar.indic))    cvarname2.vec <- rep(NA, sum(choicevar.indic))    counter <- 0    for (i in 1:length(xvars)){      if (choicevar.indic[i]){        counter <- counter + 1        vn2 <- strsplit(xvars[i], ",")        vn3 <- strsplit(vn2[[1]], "\\(")        vn4 <- strsplit(vn3[[3]], "=")        cvarname1 <- vn3[[2]][1]        cvarname1 <- strsplit(cvarname1, "\"")[[1]]        cvarname1 <- cvarname1[length(cvarname1)]        cvarname2 <- vn4[[1]][length(vn4[[1]])]        cvarname2 <- strsplit(cvarname2, "\"")[[1]][2]        if (! cvarname2 %in% choice.names){          stop("choicelevel that was set in choicevar() not consistent with\n observed choice levels in y")        }        cvarname1.vec[counter] <- cvarname1        cvarname2.vec[counter] <- cvarname2        xvars[i] <- paste(cvarname1, cvarname2, sep=".")      }    }        X.cho <- X[, choicevar.indic]    X.cho.mat <- matrix(NA, length(choice.names)*nrow(X),                        length(unique(cvarname1.vec)))    rownames(X.cho.mat) <- rep(rownames(X), rep(length(choice.names), nrow(X)))    rownames(X.cho.mat) <- paste(rownames(X.cho.mat), choice.names, sep=".")    colnames(X.cho.mat) <- unique(cvarname1.vec)    choice.names.n <- rep(choice.names, nrow(X))    for (j in 1:length(unique(cvarname1.vec))){      for (i in 1:length(cvarname2.vec)){        if (colnames(X.cho.mat)[j] == cvarname1.vec[i]){          X.cho.mat[choice.names.n==cvarname2.vec[i], j] <-            X.cho[,i]        }      }    }  }    ## deal with individual specific covariates  xvars.ind.mat <- rep(xvars[!choicevar.indic],                       rep(length(choice.names),                           sum(!choicevar.indic)))  xvars.ind.mat <- paste(xvars.ind.mat, choice.names, sep=".")  if (sum(!choicevar.indic) > 0){    X.ind.mat <- X[,!choicevar.indic] %x% diag(length(choice.names))    colnames(X.ind.mat) <- xvars.ind.mat    rownames(X.ind.mat) <- rep(rownames(X), rep(length(choice.names), nrow(X)))    rownames(X.ind.mat) <- paste(rownames(X.ind.mat), choice.names, sep=".")        ## delete columns correpsonding to the baseline choice    ivarname1 <- strsplit(xvars.ind.mat, "\\.")    ivar.keep.indic <- rep(NA, ncol(X.ind.mat))    for (i in 1:ncol(X.ind.mat)){      ivar.keep.indic[i] <- ivarname1[[i]][length(ivarname1[[i]])] != baseline    }    X.ind.mat <- X.ind.mat[,ivar.keep.indic]  }    if (sum(choicevar.indic) > 0 & sum(!choicevar.indic) > 0){    X <- cbind(X.cho.mat, X.ind.mat)  }  else if (sum(!choicevar.indic) > 0){    X <- X.ind.mat  }  else if (sum(choicevar.indic) > 0){    X <- X.cho.mat  }  else {    stop("X matrix appears to have neither choice-specific nor individual-specific variables.\n")  }  #Y <- Y.long  xvars <- colnames(X)  xobs <- rownames(X)    return(list(Ymat, X, xvars, xobs, number.choices))  }## dummy function used to handle choice-specific covariates"choicevar" <- function(var, varname, choicelevel){  junk1 <- varname  junk2 <- choicelevel  return(var)}## MNL log-posterior function (used to get starting values)## vector Y without NAs"mnl.logpost.noNA" <- function(beta, new.Y, X, b0, B0){  nobs <- length(new.Y)  ncat <- nrow(X) / nobs  Xb <- X %*% beta  Xb <- matrix(Xb, byrow=TRUE, ncol=ncat)  indices <- cbind(1:nobs, new.Y)  Xb.reform <- Xb[indices]  eXb <- exp(Xb)  #denom <- log(apply(eXb, 1, sum))  z <- rep(1, ncat)  denom <- log(eXb %*% z)  log.prior <- 0.5 * t(beta - b0) %*% B0 %*% (beta - b0)    return(sum(Xb.reform - denom) + log.prior)}## MNL log-posterior function (used to get starting values)## matrix Y with NAs"mnl.logpost.NA" <- function(beta, Y, X, b0, B0){    k <- ncol(X)    numer <- exp(X %*% beta)    numer[Y== -999] <- NA    numer.mat <- matrix(numer, nrow(Y), ncol(Y), byrow=TRUE)    denom <- apply(numer.mat, 1, sum, na.rm=TRUE)    choice.probs <- numer.mat / denom    Yna <- Y    Yna[Y == -999] <- NA      log.like.mat <- log(choice.probs) * Yna    log.like <- sum(apply(log.like.mat, 1, sum, na.rm=TRUE))    log.prior <- 0.5 * t(beta - b0) %*% B0 %*% (beta - b0)    return(log.like + log.prior)}"MCMCmnl" <-  function(formula, baseline=NULL, data = parent.frame(),            burnin = 1000, mcmc = 10000, thin=1,           mcmc.method = c("IndMH", "RWM", "slice"),           tune = 1.0, tdf=6, verbose = 0, seed = NA,           beta.start = NA, b0 = 0, B0 = 0, ...) {    ## checks    check.offset(list(...))    check.mcmc.parameters(burnin, mcmc, thin)    if (tdf <= 0){      stop("degrees of freedom for multivariate-t proposal must be positive.\n Respecify tdf and try again.\n")     }        ## seeds    seeds <- form.seeds(seed)     lecuyer <- seeds[[1]]    seed.array <- seeds[[2]]    lecuyer.stream <- seeds[[3]]        ## form response and model matrix    holder <- parse.formula.mnl(formula=formula, baseline=baseline,                                data=data)    Y <- holder[[1]]    ## check to make sure baseline category is always available in choiceset    if (is.null(baseline)){      if (max(Y[,1] == -999) == 1){        stop("Baseline choice not available in all choicesets.\n Respecify baseline category and try again.\n")      }    }    else{      if (max(Y[,baseline] == -999) == 1){        stop("Baseline choice not available in all choicesets.\n Respecify baseline category and try again.\n")      }          }    X <- holder[[2]]    xnames <- holder[[3]]    xobs <- holder[[4]]    number.choices <- holder[[5]]    K <- ncol(X)  # number of covariates        ## form the tuning parameter    tune <- vector.tune(tune, K)    ## priors and starting values     mvn.prior <- form.mvn.prior(b0, B0, K)    b0 <- mvn.prior[[1]]    B0 <- mvn.prior[[2]]    beta.init <- rep(0, K)    cat("Calculating MLEs and large sample var-cov matrix.\n")    cat("This may take a moment...\n")    if (max(is.na(Y))){      optim.out <- optim(beta.init, mnl.logpost.NA, method="BFGS",                         control=list(fnscale=-1),                         hessian=TRUE, Y=Y, X=X, b0=b0, B0=B0)    }    else{      new.Y <- apply(Y==1, 1, which)      optim.out <- optim(beta.init, mnl.logpost.noNA, method="BFGS",                         control=list(fnscale=-1),                         hessian=TRUE, new.Y=new.Y, X=X, b0=b0, B0=B0)          }    cat("Inverting Hessian to get large sample var-cov matrix.\n")    ##V <- solve(-1*optim.out$hessian)    V <- chol2inv(chol(-1*optim.out$hessian))    beta.mode <- matrix(optim.out$par, K, 1)            if (is.na(beta.start) || is.null(beta.start)){      beta.start <- matrix(optim.out$par, K, 1)    }    else if(is.null(dim(beta.start))) {      beta.start <- matrix(beta.start, K, 1)    }    else if (length(beta.start != K)){      stop("beta.start not of appropriate dimension\n")    }          ## define holder for posterior sample    sample <- matrix(data=0, mcmc/thin, dim(X)[2] )    posterior <- NULL        if (mcmc.method=="RWM"){      ## call C++ code to draw sample      auto.Scythe.call(output.object="posterior", cc.fun.name="MCMCmnlMH",                       sample.nonconst=sample, Y=Y, X=X,                       burnin=as.integer(burnin),                       mcmc=as.integer(mcmc), thin=as.integer(thin),                       tune=tune, lecuyer=as.integer(lecuyer),                       seedarray=as.integer(seed.array),                       lecuyerstream=as.integer(lecuyer.stream),                       verbose=as.integer(verbose),                       betastart=beta.start, betamode=beta.mode,                       b0=b0, B0=B0,                       V=V, RW=as.integer(1), tdf=as.double(tdf))             ## put together matrix and build MCMC object to return      output <- form.mcmc.object(posterior, names=xnames,                                 title="MCMCmnl Posterior Sample")    }    else if (mcmc.method=="IndMH"){      auto.Scythe.call(output.object="posterior", cc.fun.name="MCMCmnlMH",                       sample.nonconst=sample, Y=Y, X=X,                       burnin=as.integer(burnin),                       mcmc=as.integer(mcmc), thin=as.integer(thin),                       tune=tune, lecuyer=as.integer(lecuyer),                       seedarray=as.integer(seed.array),                       lecuyerstream=as.integer(lecuyer.stream),                       verbose=as.integer(verbose),                       betastart=beta.start, betamode=beta.mode,                       b0=b0, B0=B0,                       V=V, RW=as.integer(0), tdf=as.double(tdf))       ## put together matrix and build MCMC object to return      output <- form.mcmc.object(posterior, names=xnames,                                 title="MCMCmnl Posterior Sample")          }    else if (mcmc.method=="slice"){      ## call C++ code to draw sample      auto.Scythe.call(output.object="posterior", cc.fun.name="MCMCmnlslice",                       sample.nonconst=sample, Y=Y, X=X,                        burnin=as.integer(burnin),                       mcmc=as.integer(mcmc), thin=as.integer(thin),                       lecuyer=as.integer(lecuyer),                       seedarray=as.integer(seed.array),                       lecuyerstream=as.integer(lecuyer.stream),                       verbose=as.integer(verbose), betastart=beta.start,                       b0=b0, B0=B0, V=V)             ## put together matrix and build MCMC object to return      output <- form.mcmc.object(posterior, names=xnames,                                 title="MCMCmnl Posterior Sample")    }    return(output)      }
⌨️ 快捷键说明

复制代码 Ctrl + C
搜索代码 Ctrl + F
全屏模式 F11
切换主题 Ctrl + Shift + D
显示快捷键 ?
增大字号 Ctrl + =
减小字号 Ctrl + -