## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, message = TRUE, warning = TRUE, fig.align = "center", fig.height = 6, fig.width = 7, fig.path = "fig/", dev = "png", comment = "#>" #, ) library(cv) library(lme4) # save some typing knitr::set_alias(w = "fig.width", h = "fig.height", cap = "fig.cap") # colorize text: use inline as `r colorize(text, color)` colorize <- function(x, color) { if (knitr::is_latex_output()) { sprintf("\\textcolor{%s}{%s}", color, x) } else if (knitr::is_html_output()) { sprintf("%s", color, x) } else x } .opts <- options(digits = 5) CACHE <- FALSE ## ----HSB-data----------------------------------------------------------------- data("MathAchieve", package = "nlme") dim(MathAchieve) head(MathAchieve, 3) tail(MathAchieve, 3) data("MathAchSchool", package = "nlme") dim(MathAchSchool) head(MathAchSchool, 2) tail(MathAchSchool, 2) ## ----HSB-data-management, cache=CACHE----------------------------------------- library("dplyr") MathAchieve %>% group_by(School) %>% summarize(mean.ses = mean(SES)) -> Temp Temp <- merge(MathAchSchool, Temp, by = "School") HSB <- merge(Temp[, c("School", "Sector", "mean.ses")], MathAchieve[, c("School", "SES", "MathAch")], by = "School") names(HSB) <- tolower(names(HSB)) HSB$cses <- with(HSB, ses - mean.ses) ## ----HSB-lmer, cache=CACHE---------------------------------------------------- library("lme4") hsb.lmer <- lmer(mathach ~ mean.ses * cses + sector * cses + (cses | school), data = HSB) summary(hsb.lmer, correlation = FALSE) ## ----HSB-lmer-CV-cluster, cache=CACHE----------------------------------------- library("cv") summary(cv(hsb.lmer, k = 10, clusterVariables = "school", seed = 5240)) ## ----HSB-lmer-CV-case, cache=CACHE-------------------------------------------- summary(cv(hsb.lmer, seed = 1575)) ## ----hsb-lme, cache=CACHE----------------------------------------------------- library("nlme") hsb.lme <- lme( mathach ~ mean.ses * cses + sector * cses, random = ~ cses | school, data = HSB, control = list(opt = "optim") ) summary(hsb.lme) summary(cv(hsb.lme, k = 10, clusterVariables = "school", seed = 5240)) summary(cv(hsb.lme, seed = 1575)) ## ----include=FALSE, echo=FALSE------------------------------------------------ library("glmmTMB") # necessary for some reason to knit vignette in RStudio, harmless otherwise ## ----parameters--------------------------------------------------------------- # Parameters: Nb <- 20 # number of patients Nw <- 5 # number of occasions for each patient Bb <- 1.0 # between-patient regression coefficient on patient means Bw <- -0.5 # within-patient effect of x SD_between <- c(0, 5, 6, 8) # SD between patients SD_within <- rep(2.5, length(SD_between)) # SD within patients Nv <- length(SD_within) # number of variance profiles SD_ratio <- paste0('SD ratio = ', SD_between,' / ',SD_within) SD_ratio <- factor(SD_ratio, levels = SD_ratio) set.seed(833885) Data_template <- expand.grid(patient = 1:Nb, obs = 1:Nw) |> within({ xw <- seq(-2, 2, length.out = Nw)[obs] x <- patient + xw xm <- ave(x, patient) # within-patient mean # Scaled random error within each SD_ratio_i group re_std <- scale(resid(lm(rnorm(Nb*Nw) ~ x))) re_between <- ave(re_std, patient) re_within <- re_std - re_between re_between <- scale(re_between)/sqrt(Nw) re_within <- scale(re_within) }) Data <- do.call( rbind, lapply( 1:Nv, function(i) { cbind(Data_template, SD_ratio_i = i) } ) ) Data <- within( Data, { SD_within_ <- SD_within[SD_ratio_i] SD_between_ <- SD_between[SD_ratio_i] SD_ratio <- SD_ratio[SD_ratio_i] y <- 10 + Bb * xm + # contextual effect Bw * (x - xm) + # within-patient effect SD_within_ * re_within + # within patient random effect SD_between_ * re_between # adjustment to between patient random effect } ) ## ----plot1-------------------------------------------------------------------- library("lattice") library("latticeExtra") plot <- xyplot(y ~ x | SD_ratio, data = Data, group = patient, layout = c(Nv, 1), par.settings = list(superpose.symbol = list(pch = 1, cex = 0.7))) + layer(panel.ellipse(..., center.pch = 16, center.cex = 1.5, level = 0.5), panel.abline(a = 10, b = 1)) plot # display graph ## ----model-fits--------------------------------------------------------------- model.formulas <- c( ' ~ 1' = y ~ 1 + (1 | patient), '~ 1 + x' = y ~ 1 + x + (1 | patient), '~ 1 + x + xm' = y ~ 1 + x + xm + (1 | patient), '~ 1 + I(x - xm)' = y ~ 1 + I(x - xm) + (1 | patient) ) fits <- lapply(split(Data, ~ SD_ratio), function(d) { lapply(model.formulas, function(form) { glmmTMB(form, data = d) }) }) ## ----predict------------------------------------------------------------------ # predicted fixed and random effects: pred.BLUPs <- lapply(fits, lapply, predict) # predicted fixed effects: pred.fixed <- lapply(fits, lapply, predict, re.form = ~0) ## ----data-predictions--------------------------------------------------------- Dataf <- lapply(split(Data, ~ SD_ratio), function(d) { lapply(names(model.formulas), function(form) cbind(d, formula = form)) }) |> lapply(function(dlist) do.call(rbind, dlist)) |> do.call(rbind, args = _) Dataf <- within( Dataf, { pred.fixed <- unlist(pred.fixed) pred.BLUPs <- unlist(pred.BLUPs) panel <- factor(formula, levels = c(names(model.formulas), 'data')) } ) Data$panel <- factor('data', levels = c(names(model.formulas), 'data')) ## ----plot-fits-fixed---------------------------------------------------------- { xyplot(y ~ x |SD_ratio * panel, Data, groups = patient, type = 'n', par.strip.text = list(cex = 0.7), drop.unused.levels = FALSE) + glayer(panel.ellipse(..., center.pch = 16, center.cex = 0.5, level = 0.5), panel.abline(a = 10, b = 1)) + xyplot(pred.fixed ~ x |SD_ratio * panel, Dataf, type = 'l', groups = patient, drop.unused.levels = F, ylab = 'fixed-effect predictions') }|> useOuterStrips() |> print() ## ----plot-fits-blups---------------------------------------------------------- { xyplot(y ~ x | SD_ratio * panel, Data, groups = patient, type = 'n', drop.unused.levels = F, par.strip.text = list(cex = 0.7), ylab = 'fixed- and random-effect predictions (BLUPS)') + glayer(panel.ellipse(..., center.pch = 16, center.cex = 0.5, level = 0.5), panel.abline(a = 10, b = 1)) + xyplot(pred.BLUPs ~ x | SD_ratio * panel, Dataf, type = 'l', groups = patient, drop.unused.levels = F) }|> useOuterStrips() |> print() ## ----echo=FALSE,include=FALSE------------------------------------------------- library(cv) # unclear why it's necessary to reload cv .opts <- options(warn = -2) ## ----cross-validation,cache=CACHE--------------------------------------------- model_lists <- lapply(fits, function(fitlist) do.call(models, fitlist)) cvs_cases <- lapply(1:Nv, function(i){ cv(model_lists[[i]], k = 10, data = split(Data, ~ SD_ratio)[[i]]) }) cvs_clusters <- lapply(1:Nv, function(i){ cv(model_lists[[i]], k = 10, data = split(Data, ~SD_ratio)[[i]], clusterVariables = 'patient') }) ## ----plot-cv-example---------------------------------------------------------- plot(cvs_clusters[[2]], main="Comparison of Fixed Effects") ## ----cross-validation-data---------------------------------------------------- names(cvs_clusters) <- names(cvs_cases) <- SD_ratio dsummary <- expand.grid(SD_ratio_i = names(cvs_cases), model = names(cvs_cases[[1]])) dsummary$cases <- sapply(1:nrow(dsummary), function(i){ with(dsummary[i,], cvs_cases[[SD_ratio_i]][[model]][['CV crit']]) }) dsummary$clusters <- sapply(1:nrow(dsummary), function(i){ with(dsummary[i,], cvs_clusters[[SD_ratio_i]][[model]][['CV crit']]) }) ## ----cross-validation-data-plot----------------------------------------------- xyplot(cases + clusters ~ model|SD_ratio_i, dsummary, auto.key = list(space = 'top', reverse.rows = T, columns = 2), type = 'b', xlab = "Fixed Effects", ylab = 'CV criterion (MSE)', layout= c(Nv,1), par.settings = list(superpose.line=list(lty = c(2, 3), lwd = 3), superpose.symbol=list(pch = 15:16, cex = 1.5)), scales = list(y = list(log = TRUE), x = list(alternating = F, rot = 60))) |> print() ## ----pigs--------------------------------------------------------------------- head(Pigs, 9) head(xtabs( ~ id + week, data = Pigs), 3) tail(xtabs( ~ id + week, data = Pigs), 3) ## ----pigs-graph--------------------------------------------------------------- plot(weight ~ week, data = Pigs, type = "n") for (i in unique(Pigs$id)) { with(Pigs, lines( x = 1:9, y = Pigs[id == i, "weight"], col = "gray" )) } abline(lm(weight ~ week, data = Pigs), col = "blue", lwd = 2) lines( with(Pigs, loess.smooth(week, weight, span = 0.5)), col = "magenta", lty = 2, lwd = 2 ) ## ----pigs-lmer---------------------------------------------------------------- m.p <- lmer( weight ~ week + (1 | id) + (1 | week), data = Pigs, REML = FALSE, # i.e., ML control = lmerControl(optimizer = "bobyqa") ) summary(m.p) ## ----pigs-cv------------------------------------------------------------------ summary(cv(m.p, clusterVariables = "id")) summary(cv(m.p, clusterVariables = "week")) summary(cv( m.p, clusterVariables = c("id", "week"), k = 10, seed = 8469)) ## ----pigs-cv-cases------------------------------------------------------------ summary(cv(m.p, k = 10, seed = 8469)) ## ----coda, include = FALSE---------------------------------------------------- options(.opts)