为此,您可以在链接比例上返回样条曲线的值(没有截距),然后对值进行取幂以获得可能性的比例
如果你正在使用 mgcv::gam() 然后你可以这样做:
mgcv::gam()
library('mgcv') set.seed(1) dat <- gamSim(1, dist = "binary") m1 <- gam(y ~ s(x2), data = dat, method = "REML", family = binomial()) pdat <- with(dat, data.frame(x2 = seq(min(x2), max(x2), length = 500))) pred <- predict(m1, newdata = pdat, se.fit = TRUE, type = "iterms") pred <- data.frame(x2 = pdat$x2, fit = pred$fit[,1], se.fit = pred$se.fit[,1]) ## compute CI on the logit (log-odds) scale pred <- transform(pred, upper = fit + (2 * se.fit), lower = fit - (2 * se.fit)) ## transform fitted values + CI to odds scale pred <- transform(pred, odds = exp(fit), oupper = exp(upper), olower = exp(lower)) ## plot library("ggplot2") library("cowplot") theme_set(theme_bw()) ## plot on the logit-scale p1 <- ggplot(pred, aes(x = x2, y = fit)) + geom_ribbon(aes(x= x2, ymin = lower, ymax = upper), inherit.aes = FALSE, alpha = 0.1) + geom_line() ## plot on the odds scale p2 <- ggplot(pred, aes(x = x2, y = odds)) + geom_ribbon(aes(x= x2, ymin = olower, ymax = oupper), inherit.aes = FALSE, alpha = 0.1) + geom_line() plot_grid(p1, p2, ncol = 1)
产生这个:
上面的面板只是你展示的情节的ggplot表示。下面板将其转换为赔率。
如果模型中有多个平滑,则需要稍微修改一下。这条线
pred <- data.frame(....)
将需要从中选择其他列 $fit 和 $se.fit 组件。
$fit
$se.fit
如果你不想自己完成这一切,那么一个快速的方法是捕获输出 plot(model)
plot(model)
m2 <- gam(y ~ s(x0) + s(x1) + s(x2) + s(x3), data = dat, method = "REML", family = binomial()) plt_data <- plot(m2, pages = 1, seWithMean = TRUE)
现在 plt_data 是一个包含每个平滑组件的列表。重建你做的情节 plot(m2) ,我们需要使用:
plt_data
plot(m2)
x
fit
se
我们将编写一个函数来添加置信区间并可能应用转换:
add_ci <- function(df, trans = function(eta) { eta }) { df <- transform(df, yhat = trans(fit), upper = trans(fit + (2 * se)), lower = trans(fit - (2 * se))) df }
并将其应用于中的每个数据对象 plt_data 列表:
p1dat <- add_ci(as.data.frame(plt_data[[1]][c('x','se','fit')])) p2dat <- add_ci(as.data.frame(plt_data[[2]][c('x','se','fit')])) p3dat <- add_ci(as.data.frame(plt_data[[3]][c('x','se','fit')])) p4dat <- add_ci(as.data.frame(plt_data[[4]][c('x','se','fit')]))
现在我们可以策划
p1 <- ggplot(data = p1dat, aes(x = x, y = yhat)) + geom_ribbon(aes(x = x, ymin = lower, ymax = upper), inherit.aes = FALSE, alpha = 0.2) + geom_line() + labs(y = 's(x0)', x = 'x0') p2 <- p1 %+% p2dat + labs(y = 's(x1)', x = 'x1') p3 <- p1 %+% p3dat + labs(y = 's(x2)', x = 'x2') p4 <- p1 %+% p4dat + labs(y = 's(x3)', x = 'x3') plot_grid(p1, p2, p3, p4, ncol = 2)
给
接下来我们可以应用转换
p1dat <- add_ci(as.data.frame(plt_data[[1]][c('x','se','fit')]), trans = exp) p2dat <- add_ci(as.data.frame(plt_data[[2]][c('x','se','fit')]), trans = exp) p3dat <- add_ci(as.data.frame(plt_data[[3]][c('x','se','fit')]), trans = exp) p4dat <- add_ci(as.data.frame(plt_data[[4]][c('x','se','fit')]), trans = exp) pt1 <- p1 %+% p1dat + labs(y = 's(x0)', x = 'x0') + coord_cartesian(ylim = c(0, 100)) pt2 <- p1 %+% p2dat + labs(y = 's(x1)', x = 'x1') + coord_cartesian(ylim = c(0, 4000)) pt3 <- p1 %+% p3dat + labs(y = 's(x2)', x = 'x2') + coord_cartesian(ylim = c(0, 250)) pt4 <- p1 %+% p4dat + labs(y = 's(x3)', x = 'x3') + coord_cartesian(ylim = c(0, 5)) plot_grid(pt1, pt2, pt3, pt4, ncol = 2)
哪个产生
正如您所看到的,当CI爆炸时,您需要大量调整轴限制。