\documentclass{chapman} %%% copy Sweave.sty definitions %%% keeps `sweave' from adding `\usepackage{Sweave}': DO NOT REMOVE %\usepackage{Sweave} \RequirePackage[T1]{fontenc} \RequirePackage{graphicx,ae,fancyvrb} \IfFileExists{upquote.sty}{\RequirePackage{upquote}}{} \usepackage{relsize} \DefineVerbatimEnvironment{Sinput}{Verbatim}{} \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontfamily=courier, fontshape=it, fontsize=\relsize{-1}} \DefineVerbatimEnvironment{Scode}{Verbatim}{} \newenvironment{Schunk}{}{} %%% environment for raw output \newcommand{\SchunkRaw}{\renewenvironment{Schunk}{}{} \DefineVerbatimEnvironment{Soutput}{Verbatim}{fontfamily=courier, fontshape=it, fontsize=\small} \rawSinput } %%% environment for labeled output \newcommand{\nextcaption}{} \newcommand{\SchunkLabel}{ \renewenvironment{Schunk}{\begin{figure}[ht] }{\caption{\nextcaption} \end{figure} } \DefineVerbatimEnvironment{Sinput}{Verbatim}{frame = topline} \DefineVerbatimEnvironment{Soutput}{Verbatim}{frame = bottomline, samepage = true, fontfamily=courier, fontshape=it, fontsize=\relsize{-1}} } %%% S code with line numbers \DefineVerbatimEnvironment{Sinput} {Verbatim} { %% numbers=left } \newcommand{\numberSinput}{ \DefineVerbatimEnvironment{Sinput}{Verbatim}{numbers=left} } \newcommand{\rawSinput}{ \DefineVerbatimEnvironment{Sinput}{Verbatim}{} } %%% R / System symbols \newcommand{\R}{\textsf{R}} \newcommand{\rR}{{R}} \renewcommand{\S}{\textsf{S}} \newcommand{\SPLUS}{\textsf{S-PLUS}} \newcommand{\rSPLUS}{{S-PLUS}} \newcommand{\SPSS}{\textsf{SPSS}} \newcommand{\EXCEL}{\textsf{Excel}} \newcommand{\ACCESS}{\textsf{Access}} \newcommand{\SQL}{\textsf{SQL}} %%\newcommand{\Rpackage}[1]{\hbox{\rm\textit{#1}}} %%\newcommand{\Robject}[1]{\hbox{\rm\texttt{#1}}} %%\newcommand{\Rclass}[1]{\hbox{\rm\textit{#1}}} %%\newcommand{\Rcmd}[1]{\hbox{\rm\texttt{#1}}} \newcommand{\Rpackage}[1]{\index{#1 package@{\fontseries{b}\selectfont #1} package} {\fontseries{b}\selectfont #1}} \newcommand{\rpackage}[1]{{\fontseries{b}\selectfont #1}} \newcommand{\Robject}[1]{\texttt{#1}} \newcommand{\Rclass}[1]{\index{#1 class@\textit{#1} class}\textit{#1}} \newcommand{\Rcmd}[1]{\index{#1 function@\texttt{#1} function}\texttt{#1}} \newcommand{\Roperator}[1]{\texttt{#1}} \newcommand{\Rarg}[1]{\texttt{#1}} \newcommand{\Rlevel}[1]{\texttt{#1}} %%% other symbols \newcommand{\file}[1]{\hbox{\rm\texttt{#1}}} %%\newcommand{\stress}[1]{\index{#1}\textit{#1}} \newcommand{\stress}[1]{\textit{#1}} \newcommand{\booktitle}[1]{\textit{#1}} %%' %%% Math symbols \usepackage{amstext} \usepackage{amsmath} \newcommand{\E}{\mathsf{E}} \newcommand{\Var}{\mathsf{Var}} \newcommand{\Cov}{\mathsf{Cov}} \newcommand{\Cor}{\mathsf{Cor}} \newcommand{\x}{\mathbf{x}} \newcommand{\y}{\mathbf{y}} \renewcommand{\a}{\mathbf{a}} \newcommand{\W}{\mathbf{W}} \newcommand{\C}{\mathbf{C}} \renewcommand{\H}{\mathbf{H}} \newcommand{\X}{\mathbf{X}} \newcommand{\B}{\mathbf{B}} \newcommand{\V}{\mathbf{V}} \newcommand{\I}{\mathbf{I}} \newcommand{\D}{\mathbf{D}} \newcommand{\bS}{\mathbf{S}} \newcommand{\N}{\mathcal{N}} \renewcommand{\L}{L} \renewcommand{\P}{\mathsf{P}} \newcommand{\K}{\mathbf{K}} \newcommand{\m}{\mathbf{m}} \newcommand{\argmin}{\operatorname{argmin}\displaylimits} \newcommand{\argmax}{\operatorname{argmax}\displaylimits} \newcommand{\bx}{\mathbf{x}} \newcommand{\bbeta}{\mathbf{\beta}} %%% links \usepackage{hyperref} \hypersetup{% pdftitle = {A Handbook of Statistical Analyses Using R (3rd Edition)}, pdfsubject = {Book}, pdfauthor = {Torsten Hothorn and Brian S. Everitt}, colorlinks = {black}, linkcolor = {black}, citecolor = {black}, urlcolor = {black}, hyperindex = {true}, linktocpage = {true}, } %%% captions & tables %% : conflics with figure definition in chapman.cls %%\usepackage[format=hang,margin=10pt,labelfont=bf]{caption} %% \usepackage{longtable} \usepackage[figuresright]{rotating} %%% R symbol in chapter 1 \usepackage{wrapfig} %%% Bibliography \usepackage[round,comma]{natbib} \renewcommand{\refname}{References \addcontentsline{toc}{chapter}{References}} \citeindexfalse %%% texi2dvi complains that \newblock is undefined, hm... \def\newblock{\hskip .11em plus .33em minus .07em} %%% Example sections \newcounter{exercise}[chapter] \setcounter{exercise}{0} \newcommand{\exercise}{\stepcounter{exercise} \item{Ex.~\arabic{chapter}.\arabic{exercise} }} %% URLs \newcommand{\curl}[1]{\begin{center} \url{#1} \end{center}} %%% for manual corrections %\renewcommand{\baselinestretch}{2} %%% plot sizes \setkeys{Gin}{width=0.95\textwidth} %%% color \usepackage{color} %%% hyphenations \hyphenation{drop-out} \hyphenation{mar-gi-nal} %%% new bidirectional quotes need \usepackage[utf8]{inputenc} %\usepackage{setspace} \definecolor{sidebox_todo}{rgb}{1,1,0.2} \newcommand{\todo}[1]{ \hspace{0pt}% \marginpar{% \fcolorbox{black}{sidebox_todo}{% \parbox{\marginparwidth} { \raggedright\sffamily\footnotesize{TODO: #1}% } }% } } \begin{document} %% Title page \title{A Handbook of Statistical Analyses Using \R{} --- 3rd Edition} \author{Torsten Hothorn and Brian S. Everitt} \maketitle %%\VignetteIndexEntry{Chapter Analyzing Longitudinal Data II} %%\VignetteDepends{gee,lme4} \setcounter{chapter}{13} \SweaveOpts{prefix.string=figures/HSAUR,eps=FALSE,keep.source=TRUE} <>= rm(list = ls()) s <- search()[-1] s <- s[-match(c("package:base", "package:stats", "package:graphics", "package:grDevices", "package:utils", "package:datasets", "package:methods", "Autoloads"), s)] if (length(s) > 0) sapply(s, detach, character.only = TRUE) if (!file.exists("tables")) dir.create("tables") if (!file.exists("figures")) dir.create("figures") set.seed(290875) options(prompt = "R> ", continue = "+ ", width = 63, # digits = 4, show.signif.stars = FALSE, SweaveHooks = list(leftpar = function() par(mai = par("mai") * c(1, 1.05, 1, 1)), bigleftpar = function() par(mai = par("mai") * c(1, 1.7, 1, 1)))) HSAURpkg <- require("HSAUR3") if (!HSAURpkg) stop("cannot load package ", sQuote("HSAUR3")) rm(HSAURpkg) ### hm, R-2.4.0 --vanilla seems to need this a <- Sys.setlocale("LC_ALL", "C") ### book <- TRUE refs <- cbind(c("AItR", "DAGD", "SI", "CI", "ANOVA", "MLR", "GLM", "DE", "RP", "GAM", "SA", "ALDI", "ALDII", "SIMC", "MA", "PCA", "MDS", "CA"), 1:18) ch <- function(x) { ch <- refs[which(refs[,1] == x),] if (book) { return(paste("Chapter~\\\\ref{", ch[1], "}", sep = "")) } else { return(paste("Chapter~", ch[2], sep = "")) } } if (file.exists("deparse.R")) source("deparse.R") setHook(packageEvent("lattice", "attach"), function(...) { lattice.options(default.theme = function() standard.theme("pdf", color = FALSE)) }) @ \pagestyle{headings} <>= book <- FALSE @ <>= options(digits = 3) if (!interactive()) { print.summary.gee <- function (x, digits = NULL, quote = FALSE, prefix = "", ...) { if (is.null(digits)) digits <- options()$digits else options(digits = digits) cat("...") cat("\nModel:\n") cat(" Link: ", x$model$link, "\n") cat(" Variance to Mean Relation:", x$model$varfun, "\n") if (!is.null(x$model$M)) cat(" Correlation Structure: ", x$model$corstr, ", M =", x$model$M, "\n") else cat(" Correlation Structure: ", x$model$corstr, "\n") cat("\n...") nas <- x$nas if (!is.null(nas) && any(nas)) cat("\n\nCoefficients: (", sum(nas), " not defined because of singularities)\n", sep = "") else cat("\n\nCoefficients:\n") print(x$coefficients, digits = digits) cat("\nEstimated Scale Parameter: ", format(round(x$scale, digits))) cat("\n...\n") invisible(x) } } @ \chapter[Analyzing Longitudinal Data II]{ Analyzing Longitudinal Data II -- Generalized Estimation Equations and Linear Mixed Effect Models: Treating Respiratory Illness and Epileptic Seizures \label{ALDII}} \section{Introduction} \section{Methods for Non-normal Distributions} \section{Analysis Using \R{}: GEE} \subsection{Beat the Blues Revisited} To use the \Rcmd{gee} function, package \Rpackage{gee} \citep{PKG:gee} has to be installed and attached: <>= library("gee") @ The \Rcmd{gee} function is used in a similar way to the \Rcmd{lme} function met in \Sexpr{ch("ALDI")} with the addition of the features of the \Rcmd{glm} function that specify the appropriate error distribution for the response and the implied link function, and an argument to specify the structure of the working correlation matrix. Here we will fit an independence structure and then an exchangeable structure. The \R{} code for fitting generalized estimation equations to the \Robject{BtheB\_long} data (as constructed in \Sexpr{ch("ALDI")}) with identity working correlation matrix is as follows (note that the \Rcmd{gee} function assumes the rows of the \Rclass{data.frame} \Robject{BtheB\_long} to be ordered with respect to subjects): <>= data("BtheB", package = "HSAUR3") BtheB$subject <- factor(rownames(BtheB)) nobs <- nrow(BtheB) BtheB_long <- reshape(BtheB, idvar = "subject", varying = c("bdi.2m", "bdi.3m", "bdi.5m", "bdi.8m"), direction = "long") BtheB_long$time <- rep(c(2, 3, 5, 8), rep(nobs, 4)) names(BtheB_long)[names(BtheB_long) == "treatment"] <- "trt" @ <>= osub <- order(as.integer(BtheB_long$subject)) BtheB_long <- BtheB_long[osub,] btb_gee <- gee(bdi ~ bdi.pre + trt + length + drug, data = BtheB_long, id = subject, family = gaussian, corstr = "independence") @ and with exchangeable correlation matrix: <>= btb_gee1 <- gee(bdi ~ bdi.pre + trt + length + drug, data = BtheB_long, id = subject, family = gaussian, corstr = "exchangeable") @ The \Rcmd{summary} method can be used to inspect the fitted models; the results are shown in Figures~\ref{ALDII-gee-summary} and \ref{ALDII-gee1-summary}. \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{btb\_gee} model (slightly abbreviated). \label{ALDII-gee-summary}} \SchunkLabel <>= summary(btb_gee) @ \SchunkRaw \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{btb\_gee1} model (slightly abbreviated). \label{ALDII-gee1-summary}} \SchunkLabel <>= summary(btb_gee1) @ \SchunkRaw \subsection{Respiratory Illness \label{ALDII:resp}} The baseline status, i.e., the status for \Robject{month == 0}, will enter the models as an explanatory variable and thus we have to rearrange the \Rclass{data.frame} \Robject{respiratory} in order to create a new variable \Robject{baseline}: <>= data("respiratory", package = "HSAUR3") resp <- subset(respiratory, month > "0") resp$baseline <- rep(subset(respiratory, month == "0")$status, rep(4, 111)) resp$nstat <- as.numeric(resp$status == "good") resp$month <- resp$month[, drop = TRUE] @ <>= names(resp)[names(resp) == "treatment"] <- "trt" levels(resp$trt)[2] <- "trt" @ The new variable \Robject{nstat} is simply a dummy coding for a poor respiratory status. Now we can use the data \Robject{resp} to fit a logistic regression model and GEE models with an independent and an exchangeable correlation structure as follows. <>= resp_glm <- glm(status ~ centre + trt + gender + baseline + age, data = resp, family = "binomial") resp_gee1 <- gee(nstat ~ centre + trt + gender + baseline + age, data = resp, family = "binomial", id = subject, corstr = "independence", scale.fix = TRUE, scale.value = 1) resp_gee2 <- gee(nstat ~ centre + trt + gender + baseline + age, data = resp, family = "binomial", id = subject, corstr = "exchangeable", scale.fix = TRUE, scale.value = 1) @ \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{resp\_glm} model. \label{ALDII-resp-glm-summary}} \SchunkLabel <>= summary(resp_glm) @ \SchunkRaw \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{resp\_gee1} model (slightly abbreviated). \label{ALDII-resp-gee1-summary}} \SchunkLabel <>= summary(resp_gee1) @ \SchunkRaw \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{resp\_gee2} model (slightly abbreviated). \label{ALDII-resp-gee2-summary}} \SchunkLabel <>= summary(resp_gee2) @ \SchunkRaw The estimated treatment effect taken from the exchangeable structure GEE model is \Sexpr{round(coef(resp_gee2)["trttrt"], 3)} which, using the robust standard errors, has an associated $95\%$ confidence interval <>= se <- summary(resp_gee2)$coefficients["trttrt", "Robust S.E."] coef(resp_gee2)["trttrt"] + c(-1, 1) * se * qnorm(0.975) @ These values reflect effects on the log-odds scale. Interpretation becomes simpler if we exponentiate the values to get the effects in terms of odds. This gives a treatment effect of \Sexpr{round(exp(coef(resp_gee2)["trttrt"]), 3)} and a $95\%$ confidence interval of <>= exp(coef(resp_gee2)["trttrt"] + c(-1, 1) * se * qnorm(0.975)) @ The odds of achieving a `good' respiratory status with the active treatment is between %' about twice and seven times the corresponding odds for the placebo. \subsection{Epilepsy} Moving on to the count data in \Robject{epilepsy} from Table~\ref{ALDII-epilepsy-tab}, we begin by calculating the means and variances of the number of seizures for all interactions between treatment and period: <>= data("epilepsy", package = "HSAUR3") itp <- interaction(epilepsy$treatment, epilepsy$period) tapply(epilepsy$seizure.rate, itp, mean) tapply(epilepsy$seizure.rate, itp, var) @ Some of the variances are considerably larger than the corresponding means, which for a Poisson variable may suggest that overdispersion may be a problem, see \Sexpr{ch("GLM")}. \begin{figure} \begin{center} <>= layout(matrix(1:2, nrow = 1)) ylim <- range(epilepsy$seizure.rate) placebo <- subset(epilepsy, treatment == "placebo") progabide <- subset(epilepsy, treatment == "Progabide") boxplot(seizure.rate ~ period, data = placebo, ylab = "Number of seizures", xlab = "Period", ylim = ylim, main = "Placebo") boxplot(seizure.rate ~ period, data = progabide, main = "Progabide", ylab = "Number of seizures", xlab = "Period", ylim = ylim) @ \caption{Boxplots of numbers of seizures in each two-week period post randomization for placebo and active treatments. \label{ALDII-plot1}} \end{center} \end{figure} \begin{figure} \begin{center} <>= layout(matrix(1:2, nrow = 1)) ylim <- range(log(epilepsy$seizure.rate + 1)) boxplot(log(seizure.rate + 1) ~ period, data = placebo, main = "Placebo", ylab = "Log number of seizures", xlab = "Period", ylim = ylim) boxplot(log(seizure.rate + 1) ~ period, data = progabide, main = "Progabide", ylab = "Log number of seizures", xlab = "Period", ylim = ylim) @ \caption{Boxplots of log of numbers of seizures in each two-week period post randomization for placebo and active treatments. \label{ALDII-plot2}} \end{center} \end{figure} We can now fit a Poisson regression model to the data assuming independence using the \Rcmd{glm} function. We also use the GEE approach to fit an independence structure, followed by an exchangeable structure using the following \R{} code: <>= per <- rep(log(2),nrow(epilepsy)) epilepsy$period <- as.numeric(epilepsy$period) names(epilepsy)[names(epilepsy) == "treatment"] <- "trt" fm <- seizure.rate ~ base + age + trt + offset(per) epilepsy_glm <- glm(fm, data = epilepsy, family = "poisson") epilepsy_gee1 <- gee(fm, data = epilepsy, family = "poisson", id = subject, corstr = "independence", scale.fix = TRUE, scale.value = 1) epilepsy_gee2 <- gee(fm, data = epilepsy, family = "poisson", id = subject, corstr = "exchangeable", scale.fix = TRUE, scale.value = 1) epilepsy_gee3 <- gee(fm, data = epilepsy, family = "poisson", id = subject, corstr = "exchangeable", scale.fix = FALSE, scale.value = 1) @ As usual we inspect the fitted models using the \Rcmd{summary} method, the results are given in Figures~\ref{ALDII-epilepsy-glm-summary}, \ref{ALDII-epilepsy-gee1-summary}, \ref{ALDII-epilepsy-gee2-summary}, and \ref{ALDII-epilepsy-gee3-summary}. \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{epilepsy\_glm} model. \label{ALDII-epilepsy-glm-summary}} \SchunkLabel <>= summary(epilepsy_glm) @ \SchunkRaw \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{epilepsy\_gee1} model (slightly abbreviated). \label{ALDII-epilepsy-gee1-summary}} \SchunkLabel <>= summary(epilepsy_gee1) @ \SchunkRaw \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{epilepsy\_gee2} model (slightly abbreviated). \label{ALDII-epilepsy-gee2-summary}} \SchunkLabel <>= summary(epilepsy_gee2) @ \SchunkRaw \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{epilepsy\_gee3} model (slightly abbreviated). \label{ALDII-epilepsy-gee3-summary}} \SchunkLabel <>= summary(epilepsy_gee3) @ \SchunkRaw \section{Analysis Using \R{}: Random Effects} As an example of using generalized mixed models for the analysis of longitudinal data with a non-normal response, the following logistic model will be fitted to the respiratory illness data \begin{eqnarray*} \text{logit}(\P(\text{status} = \text{good})) & = & \beta_0 + \beta_1 \text{treatment} + \beta_2 \text{time} + \beta_3 \text{gender} \\% & & + \beta_4 \text{age} + \beta_5 \text{centre} + \beta_6 \text{baseline} + u \end{eqnarray*} where $u$ is a subject-specific random effect. The necessary \R{} code for fitting the model using the \Rcmd{glmer} function from package \Rpackage{lme4} \citep{PKG:lme4,HSAUR:Bates2005} is: <>= library("lme4") resp_lmer <- glmer(status ~ baseline + month + trt + gender + age + centre + (1 | subject), family = binomial(), data = resp) exp(fixef(resp_lmer)) @ The significance of the effects as estimated by this random effects model and by the GEE model described in Section~\ref{ALDII:resp} is generally similar. But as expected from our previous discussion the estimated coefficients are substantially larger. While the estimated effect of treatment on a randomly sampled individual, given the set of observed covariates, is estimated by the marginal model using GEE to increase the log-odds of being disease free by $\Sexpr{round(coef(resp_gee2)["trttrt"], 3)}$, the corresponding estimate from the random effects model is $\Sexpr{round(fixef(resp_lmer)["trttrt"], 3)}$. These are not inconsistent results but reflect the fact that the models are estimating different parameters. The random effects estimate is conditional upon the patient's random effect, a quantity that is rarely known in practice. Were we to examine the log-odds of the average predicted probabilities with and without treatment (averaged over the random effects) this would give an estimate comparable to that estimated within the marginal model. <>= su <- summary(resp_lmer) if (!interactive()) { summary <- function(x) { cat("\n...\n") cat("Fixed effects:\n") lme4V <- packageDescription("lme4")$Version if (compareVersion("0.999999-2", lme4V) >= 0) { printCoefmat(su@coefs) } else { printCoefmat(su$coefficients) } cat("\n...\n") } } @ \renewcommand{\nextcaption}{\R{} output of the \Rcmd{summary} method for the \Robject{resp\_lmer} model (abbreviated). \label{ALDII-resp-lmer-summary}} \SchunkLabel <>= summary(resp_lmer) @ \SchunkRaw \clearpage \bibliographystyle{LaTeXBibTeX/refstyle} \bibliography{LaTeXBibTeX/HSAUR} \end{document}