📄 ksvm.r
字号:
setGeneric("ksvm", function(x, ...) standardGeneric("ksvm"))setMethod("ksvm",signature(x="formula"),function (x, data=NULL, ..., subset, na.action = na.omit, scaled = TRUE){ call <- match.call() m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m$... <- NULL m$formula <- m$x m$x <- NULL m$scaled <- NULL m[[1]] <- as.name("model.frame") m <- eval(m, parent.frame()) Terms <- attr(m, "terms") attr(Terms, "intercept") <- 0 x <- model.matrix(Terms, m) y <- model.extract(m, response) if (length(scaled) == 1) scaled <- rep(scaled, ncol(x)) if (any(scaled)) { remove <- unique(c(which(labels(Terms) %in% names(attr(x, "contrasts"))), which(!scaled) ) ) scaled <- !attr(x, "assign") %in% remove } ret <- ksvm(x, y, scaled = scaled, ...) kcall(ret) <- call kterms(ret) <- Terms if (!is.null(attr(m, "na.action"))) n.action(ret) <- attr(m, "na.action") return (ret)})setMethod("ksvm",signature(x="vector"),function(x,...) { x <- t(t(x)) ret <- ksvm(x, ...) return(ret) }) setMethod("ksvm",signature(x="matrix"),function (x, y = NULL, scaled = TRUE, type = NULL, kernel = "rbfdot", kpar = list(sigma = 0.1), C = 1, nu = 0.2, epsilon = 0.1, class.weights = NULL, cross = 0, fit = TRUE, cache = 40, tol = 0.001, shrinking = TRUE, ... ,subset ,na.action = na.omit){## subsetting and na-handling for matrices ret <- new("ksvm") if (!missing(subset)) x <- x[subset,] if (is.null(y)) x <- na.action(x) else { df <- na.action(data.frame(y, x)) y <- df[,1] x <- as.matrix(df[,-1]) } n.action(ret) <- na.action if (is.null(type)) type(ret) <- if (is.null(y)) "one-classification" else if (is.factor(y)) "C-classification" else "eps-regression" if(!is.null(type)) type(ret) <- match.arg(type,c("C-classification", "nu-classification", "spoc-classification", "kbb-classification", "one-classification", "eps-regression", "nu-regression")) unscaledx <- x x.scale <- y.scale <- NULL ## scaling if (length(scaled) == 1) scaled <- rep(scaled, ncol(x)) if (any(scaled)) { co <- !apply(x[,scaled, drop = FALSE], 2, var) if (any(co)) { scaled <- rep(FALSE, ncol(x)) warning(paste("Variable(s)", paste("`",colnames(x[,scaled, drop = FALSE])[co], "'", sep="", collapse=" and "), "constant. Cannot scale data.") ) } else { xtmp <- scale(x[,scaled]) x[,scaled] <- xtmp x.scale <- attributes(xtmp)[c("scaled:center","scaled:scale")] if (is.numeric(y)&&(type(ret)!="C-classification"&&type(ret)!="nu-classification"&&type(ret)!="spoc-classification")) { y <- scale(y) y.scale <- attributes(y)[c("scaled:center","scaled:scale")] y <- as.vector(y) } scaling(ret) <- list(scaled = scaled, x.scale = x.scale, y.scale = y.scale) } } ncols <- ncol(x) m <- nrows <- nrow(x) if(is.character(kernel)) kernel <- match.arg(kernel,c("rbfdot","polydot","tanhdot","vanilladot")) if (!is.list(kpar)&&is.character(kpar)&&( class(kernel)=="rbfkernel" || kernel=="rbfdot")){ kp <- match.arg(kpar,"automatic") if(kp=="automatic") kpar <- list(sigma=sum(sigest(x,scaled=FALSE))/2) cat("Using automatic sigma estimation (sigest) for RBF kernel","\n") } if(!is(kernel,"kernel")) { if(is(kernel,"function")) kernel <- deparse(substitute(kernel)) kernel <- do.call(kernel, kpar) } if(!is(kernel,"kernel")) stop("kernel must inherit from class `kernel'") ##internal function used by smo_optim .kernelin <- function(v) { return(kernelMatrix(kernel,xd,xd[v,,drop=FALSE])) } if (!is.vector(y) && !is.factor (y) && !(type(ret)=="one-classification")) stop("y must be a vector or a factor.") if ((type(ret) != "one-classification") && nrows != nrow(x)) stop("x and y don't match.") if(nu > 1|| nu <0) stop("nu must be between 0 an 1.") weightlabels <- NULL nweights <- 0 weight <- 0 wl <- 0 ## in case of classification: transform factors into integers if (type(ret) == "one-classification") # one class classification --> set dummy y <- 1 else if (is.factor(y)) { lev(ret) <- levels (y) y <- as.integer (y) if (!is.null(class.weights)) { if (is.null(names (class.weights))) stop ("Weights have to be specified along with their according level names !") weightlabels <- match (names(class.weights),lev(ret)) if (any(is.na(weightlabels))) stop ("At least one level name is missing or misspelled.") } } else { if ((type(ret) =="C-classification" || type(ret) == "nu-classification" || type(ret) == "spoc-classification" || type(ret) == "kbb-classification") && any(as.integer (y) != y)) stop ("dependent variable has to be of factor or integer type for classification mode.") if (type(ret) != "eps-regression" || type(ret) != "nu-regression") lev(ret) <- unique (y) } ## initialize nclass(ret) <- length (lev(ret)) p <- 0 svindex <- problem <- NULL sigma <- 0.1 degree <- offset <- scale <- 1 switch(is(kernel)[1], "rbfkernel" = { sigma <- kpar(kernel)$sigma ktype <- 2 }, "tanhkernel" = { sigma <- kpar(kernel)$scale offset <- kpar(kernel)$offset ktype <- 3 }, "polykernel" = { degree <- kpar(kernel)$degree sigma <- kpar(kernel)$scale offset <- kpar(kernel)$offset ktype <- 1 }, "vanillakernel" = { ktype <- 0 }, ktype <- 4 ) if(type(ret) == "C-classification"){ indexes <- lapply(1:nclass(ret), function(kk) which(y == kk)) for (i in 1:(nclass(ret)-1)) { jj <- i+1 for(j in jj:nclass(ret)) { p <- p+1 ##prepare data li <- length(indexes[[i]]) lj <- length(indexes[[j]]) xd <- matrix(0,(li+lj),dim(x)[2]) xdi <- 1:(li+lj) <= li xd[xdi,rep(TRUE,dim(x)[2])] <- x[indexes[[i]],] xd[xdi == FALSE,rep(TRUE,dim(x)[2])] <- x[indexes[[j]],] if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) { yd <- c(rep(1,li),rep(-1,lj)) if(!is.null(class.weights)){ weight <- weightlabels[c(i,j)] wl <- c(1,0) nweights <- 2 } } else { yd <- c(rep(-1,li),rep(1,lj)) if(!is.null(class.weights)){ weight <- weightlabels[c(j,i)] wl <- c(0,1) nweigths <- 2 } } resv <- .Call("smo_optim", as.double(t(xd)), as.integer(nrow(xd)), as.integer(ncol(xd)), as.double(yd), as.double(matrix(rep(-1,m))), ##linear term as.integer(ktype), as.integer(0), as.double(C), as.double(nu), as.double(epsilon), as.double(sigma), as.integer(degree), as.double(offset), as.integer(wl), ##weightlabel as.double(weight), as.integer(nweights), as.double(cache), as.double(tol), as.integer(shrinking), .kernelin, environment(.kernelin)) alpha(ret)[p] <- list(resv[-(li+lj+1)]) ## nonzero alpha*y coeff(ret)[p] <- list(alpha(ret)[[p]][alpha(ret)[[p]]>0]*yd[alpha(ret)[[p]]>0]) ## store SV indexes from current problem for later use in predict alphaindex(ret)[p] <- list(c(indexes[[i]],indexes[[j]])[which(resv[-(li+lj+1)]>0)]) ## save the indexes from all the SV in a vector (use unique?) svindex <- c(svindex,alphaindex(ret)[[p]]) ## store betas in a vector b(ret) <- c(b(ret), resv[li+lj+1]) ## used to reconstruct indexes for the patterns matrix x from "indexes" (really usefull ?) problem[p] <- list(c(i,j)) ##store C in return object param(ret)$C <- C } } } if(type(ret) == "nu-classification"){ indexes <- lapply(1:nclass(ret), function(kk) which(y == kk)) for (i in 1:(nclass(ret)-1)) { jj <- i+1 for(j in jj:nclass(ret)) { p <- p+1 ##prepare data li <- length(indexes[[i]]) lj <- length(indexes[[j]]) xd <- matrix(0,(li+lj),dim(x)[2]) xdi <- 1:(li+lj) <= li xd[xdi,rep(TRUE,dim(x)[2])] <- x[indexes[[i]],] xd[xdi == FALSE,rep(TRUE,dim(x)[2])] <- x[indexes[[j]],] if(y[indexes[[i]][1]] < y[indexes[[j]]][1]) yd <- c(rep(1,li),rep(-1,lj)) else yd <- c(rep(-1,li),rep(1,lj)) resv <- .Call("smo_optim", as.double(t(xd)), as.integer(nrow(xd)), as.integer(ncol(xd)), as.double(yd), as.double(matrix(rep(-1,m))), #linear term as.integer(ktype), as.integer(1), as.double(C),
⌨️ 快捷键说明
复制代码
Ctrl + C
搜索代码
Ctrl + F
全屏模式
F11
切换主题
Ctrl + Shift + D
显示快捷键
?
增大字号
Ctrl + =
减小字号
Ctrl + -