## ----preliminaries, include = FALSE------------------------------------------- library("betareg") knitr::opts_chunk$set( engine = "R", collapse = TRUE, comment = "##", message = FALSE, warning = FALSE, echo = TRUE ) options(width = 70, prompt = "R> ", continue = "+ ", useFancyQuotes = FALSE, digits = 5) combine <- function(x, sep, width) { cs <- cumsum(nchar(x)) remaining <- if (any(cs[-1] > width)) combine(x[c(FALSE, cs[-1] > width)], sep, width) c(paste(x[c(TRUE, cs[-1] <= width)], collapse= sep), remaining) } prettyPrint <- function(x, sep = " ", linebreak = "\n\t", width = getOption("width")) { x <- strsplit(x, sep)[[1]] paste(combine(x, sep, width), collapse = paste(sep, linebreak, collapse = "")) } cache <- FALSE enumerate <- function(x) paste(paste(x[-length(x)], collapse = ", "), x[length(x)], sep = " and ") betamix_methods <- enumerate(paste("`", gsub("\\.betamix", "", as.character(methods(class = "betamix"))), "`", sep = "")) ## ----eval=FALSE--------------------------------------------------------------- ## betareg(formula, data, subset, na.action, weights, offset, ## link = "logit", link.phi = NULL, type = c("ML", "BC", "BR"), ## control = betareg.control(...), model = TRUE, y = TRUE, x = FALSE, ...) ## ----eval=FALSE--------------------------------------------------------------- ## betatree(formula, partition, data, subset, na.action, weights, offset, ## link = "logit", link.phi = "log", control = betareg.control(), ...) ## ----eval=FALSE--------------------------------------------------------------- ## betamix(formula, data, k, fixed, subset, na.action, ## link = "logit", link.phi = "log", control = betareg.control(...), ## FLXconcomitant = NULL, extra_components, ## verbose = FALSE, ID, nstart = 3, FLXcontrol = list(), cluster = NULL, ## which = "BIC", ...) ## ----include=FALSE------------------------------------------------------------ data("ReadingSkills", package = "betareg") mean_accuracy <- format(round(with(ReadingSkills, tapply(accuracy, dyslexia, mean)), digits = 3), nsmall = 3) mean_iq <- format(round(with(ReadingSkills, tapply(iq, dyslexia, mean)), digits = 3), nsmall = 3) ## ----ReadingSkills------------------------------------------------------------ #| echo: false #| fig-width: 6 #| fig-height: 5.5 #| out-width: 100% #| label: fig-ReadingSkills #| fig-cap: "Reading skills data from @betareg:Smithson+Verkuilen:2006. Linearly transformed reading accuracy by IQ score and dyslexia status (control, blue vs. dyslexic, red). Fitted curves correspond to beta regression (solid) and OLS regression with logit-transformed dependent variable (dashed)." data("ReadingSkills", package = "betareg") rs_ols <- lm(qlogis(accuracy) ~ dyslexia * iq, data = ReadingSkills) rs_beta <- betareg(accuracy ~ dyslexia * iq | dyslexia + iq, data = ReadingSkills, hessian = TRUE) cl1 <- hcl(c(260, 0), 90, 40) cl2 <- hcl(c(260, 0), 10, 95) plot(accuracy ~ iq, data = ReadingSkills, col = cl2[as.numeric(dyslexia)], main = "Reading skills data", xlab = "IQ score", ylab = "Reading accuracy", pch = c(19, 17)[as.numeric(dyslexia)], cex = 1.5) points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = (1:2)[as.numeric(dyslexia)], col = cl1[as.numeric(dyslexia)]) nd <- data.frame(dyslexia = "no", iq = -30:30/10) lines(nd$iq, predict(rs_beta, nd), col = cl1[1], lwd = 2) lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[1], lty = 2, lwd = 2) nd <- data.frame(dyslexia = "yes", iq = -30:30/10) lines(nd$iq, predict(rs_beta, nd), col = cl1[2], lwd = 2) lines(nd$iq, plogis(predict(rs_ols, nd)), col = cl1[2], lty = 2, lwd = 2) legend("topleft", c("control", "dyslexic", "betareg", "lm"), lty = c(NA, NA, 1:2), pch = c(19, 17, NA, NA), lwd = 2, col = c(cl2, 1, 1), bty = "n") legend("topleft", c("control", "dyslexic", "betareg", "lm"), lty = c(NA, NA, 1:2), pch = c(1, 2, NA, NA), col = c(cl1, NA, NA), bty = "n") ## ----ReadingSkills-bias------------------------------------------------------- data("ReadingSkills", package = "betareg") rs_f <- accuracy ~ dyslexia * iq | dyslexia * iq rs_ml <- betareg(rs_f, data = ReadingSkills, type = "ML") rs_bc <- betareg(rs_f, data = ReadingSkills, type = "BC") rs_br <- betareg(rs_f, data = ReadingSkills, type = "BR") ## ----ReadingSkills-bias-table, echo=FALSE, results='asis'--------------------- rs_list <- list(rs_ml, rs_bc, rs_br) cf <- paste("$", sapply(round(sapply(rs_list, coef), digits = 3), format, nsmall = 3), "$", sep = "") se <- paste("$", format(round(sapply(rs_list, function(x) sqrt(diag(vcov(x)))), digits = 3), nsmall = 3), "$", sep = "") ll <- paste("$", format(round(sapply(rs_list, logLik), digits = 3), nsmall = 3), "$", sep = "") cfse <- matrix(as.vector(rbind(cf, se)), ncol = 3) cfse <- cbind( c("Mean", rep("", 7), "Precision", rep("", 7)), rep(as.vector(rbind(c("(Intercept)", "`dyslexia`", "`iq`", "`dyslexia:iq`"), "")), 2), cfse) cfse <- rbind(cfse, c("Log-likelihood", "", ll)) knitr::kable(cfse, align = c("l", "l", "r", "r", "r"), col.names = c("", "", "Maximum likelihood", "Bias correction", "Bias reduction")) ## ----ReadingSkills-phi-plot--------------------------------------------------- #| echo: false #| fig-height: 6.5 #| fig-width: 6.5 #| out-width: 100% #| label: fig-readingskillsbias #| fig-cap: "Scatterplots of the logarithm of the estimated precision parameters $\\log(\\phi_i)$ based on the maximum likelihood, bias-corrected and bias-reduced estimates. The dashed black line is the main diagonal, the solid red line is a scatterplot smoother." pr_phi <- sapply(list("Maximum likelihood" = rs_ml, "Bias correction" = rs_bc, "Bias reduction" = rs_br), predict, type = "precision") pairs(log(pr_phi), panel = function(x, y, ...) { panel.smooth(x, y, ...) abline(0, 1, lty = 2) }) ## ----ReadingSkills-noise, echo=TRUE------------------------------------------- suppressWarnings(RNGversion("3.5.0")) set.seed(1071) n <- nrow(ReadingSkills) ReadingSkills$x1 <- rnorm(n) ReadingSkills$x2 <- runif(n) ReadingSkills$x3 <- factor(sample(0:1, n, replace = TRUE)) ## ----ReadingSkills-tree------------------------------------------------------- rs_tree <- betatree(accuracy ~ iq | iq, ~ dyslexia + x1 + x2 + x3, data = ReadingSkills, minsize = 10) ## ----ReadingSkills-tree2, echo=TRUE, eval=FALSE------------------------------- ## rs_tree <- betatree(accuracy ~ iq | iq | dyslexia + x1 + x2 + x3, ## data = ReadingSkills, minsize = 10) ## ----ReadingSkills-tree3, eval=FALSE------------------------------------------ ## plot(rs_tree) ## ----ReadingSkills-tree-plot-------------------------------------------------- #| echo: false #| fig-height: 7 #| fig-width: 10 #| out-width: 100% #| label: fig-betatree #| fig-cap: "Partitioned beta regression model for the `ReadingSkills` data." plot(rs_tree) ## ----ReadingSkills-tree-coef-------------------------------------------------- coef(rs_tree) ## ----ReadingSkills-tree4, echo=TRUE------------------------------------------- rs_tree ## ----ReadingSkills-tree-sctest------------------------------------------------ library("strucchange") sctest(rs_tree) ## ----ReadingSkills-mix-------------------------------------------------------- rs_mix <- betamix(accuracy ~ iq, data = ReadingSkills, k = 3, extra_components = extraComponent(type = "uniform", coef = 0.99, delta = 0.01), nstart = 10) ## ----ReadingSkills-mix3------------------------------------------------------- rs_mix ## ----ReadingSkills-mix4------------------------------------------------------- summary(rs_mix) ## ----ReadingSkills-betamix-plot3---------------------------------------------- #| echo: false #| fig-height: 5.5 #| fig-width: 10 #| out-width: 100% #| label: fig-betamix #| fig-cap: "Fitted regression lines for the mixture model with three components and the observations shaded according to their posterior probabilities (left). Fitted regression lines for the partitioned beta regression model with shading according to the observed `dyslexic` variable where nondyslexic and dyslexic children are in blue and red, respectively (right)." par(mfrow = c(1, 2)) ix <- as.numeric(ReadingSkills$dyslexia) prob <- 2 * (posterior(rs_mix)[cbind(seq_along(ix), clusters(rs_mix))] - 0.5) col3 <- hcl(c(0, 260, 130), 65, 45, fixup = FALSE) col1 <- col3[clusters(rs_mix)] col2 <- hcl(c(0, 260, 130)[clusters(rs_mix)], 65 * abs(prob)^1.5, 95 - 50 * abs(prob)^1.5, fixup = FALSE) plot(accuracy ~ iq, data = ReadingSkills, col = col2, pch = 19, cex = 1.5, xlim = c(-2, 2), main = "Mixture model (dyslexia unobserved)") points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = 1, col = col1) iq <- -30:30/10 cf <- rbind(coef(rs_mix, model = "mean", component = 1:2), c(qlogis(0.99), 0)) for(i in 1:3) lines(iq, plogis(cf[i, 1] + cf[i, 2] * iq), lwd = 2, col = col3[i]) ix <- as.numeric(ReadingSkills$dyslexia) col1 <- hcl(c(260, 0), 90, 40)[ix] col2 <- hcl(c(260, 0), 10, 95)[ix] plot(accuracy ~ iq, data = ReadingSkills, col = col2, pch = 19, cex = 1.5, xlim = c(-2, 2), main = "Partitioned model (dyslexia observed)") points(accuracy ~ iq, data = ReadingSkills, cex = 1.5, pch = 1, col = col1) cf <- coef(rs_tree, model = "mean") col3 <- hcl(c(260, 0), 90, 40) for(i in 1:2) lines(iq, plogis(cf[i, 1] + cf[i, 2] * iq), lwd = 2, col = col3[i]) ## ----ReadingSkills-mix5------------------------------------------------------- table(clusters(rs_mix), ReadingSkills$dyslexia) ## ----GasolineYield-bias------------------------------------------------------- data("GasolineYield", package = "betareg") gy <- lapply(c("ML", "BC", "BR"), function(x) betareg(yield ~ batch + temp, data = GasolineYield, type = x)) ## ----GasolineYield-bias-table, echo=FALSE, results='asis'--------------------- cf <- matrix(paste("$", sapply(round(sapply(gy, coef), digits = 5), format, nsmall = 5), "$", sep = ""), ncol = 3) se <- matrix(gsub(" ", "", paste("$", format(round(sapply(gy, function(x) sqrt(diag(vcov(x)))), digits = 5), nsmall = 5), "$", sep = ""), fixed = TRUE), ncol = 3) cfse <- cbind(c(paste("$\\beta_{", 1:11, "}$", sep = ""), "$\\phi$"), cf[,1], se[,1], cf[,2], se[,2], cf[,3], se[,3]) knitr::kable(cfse, align = c("l", rep("r", 6)), col.names = c("", "Maximum likelihood", "", "Bias correction", "", "Bias reduction", "")) ## ----GasolineYield-phi-------------------------------------------------------- sapply(gy, coef, model = "precision") ## ----GasolineYield-phi-loglik------------------------------------------------- sapply(gy, logLik) ## ----GasolineYield-bias2------------------------------------------------------ data("GasolineYield", package = "betareg") gy2 <- lapply(c("ML", "BC", "BR"), function(x) betareg(yield ~ batch + temp | 1, data = GasolineYield, type = x)) sapply(gy2, logLik) ## ----GasolineYield-bias-table2, echo=FALSE, results='asis'-------------------- cf <- matrix(paste("$", sapply(round(sapply(gy2, coef), digits = 5), format, nsmall = 5), "$", sep = ""), ncol = 3) se <- matrix(gsub(" ", "", paste("$", format(round(sapply(gy2, function(x) sqrt(diag(vcov(x)))), digits = 5), nsmall = 5), "$", sep = ""), fixed = TRUE), ncol = 3) cfse <- cbind(c(paste("$\\beta_{", 1:11, "}$", sep = ""), "$\\log\\phi$"), cf[,1], se[,1], cf[,2], se[,2], cf[,3], se[,3]) knitr::kable(cfse, align = c("l", rep("r", 6)), col.names = c("", "Maximum likelihood", "", "Bias correction", "", "Bias reduction", ""))