## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(comment = NA, eval = greta:::check_tf_version("message"), cache = TRUE) library (greta) ## ----linear_greta------------------------------------------------------------- # variables & priors int <- normal(0, 10) coef <- normal(0, 10) sd <- cauchy(0, 3, truncation = c(0, Inf)) # linear predictor mu <- int + coef * attitude$complaints # observation model distribution(attitude$rating) <- normal(mu, sd) ## ----multiple_linear_data----------------------------------------------------- data(attitude) design <- as.matrix(attitude[, 2:7]) ## ----multiple_linear_greta---------------------------------------------------- int <- normal(0, 10) coefs <- normal(0, 10, dim = ncol(design)) sd <- cauchy(0, 3, truncation = c(0, Inf)) # matrix multiplication is more efficient than multiplying the coefficients # separately mu <- int + design %*% coefs distribution(attitude$rating) <- normal(mu, sd) ## ----multiple_linear_warpbreaks_data------------------------------------------ data("warpbreaks") X <- as_data(model.matrix(breaks ~ wool + tension, warpbreaks)) y <- as_data(warpbreaks$breaks) ## ----multiple_linear_warpbreaks_greta----------------------------------------- int <- variable() coefs <- normal(0, 5, dim = ncol(X) - 1) beta <- c(int, coefs) eta <- X %*% beta distribution(y) <- poisson(exp(eta)) ## ----multiple_linear_multilogit_data------------------------------------------ data(iris) X <- as_data(cbind(1, iris[, 1:4])) y <- model.matrix(~ Species - 1, iris) P <- ncol(X) K <- ncol(y) ## ----multiple_linear_multilogit_greta----------------------------------------- beta <- normal(0, 5, dim = c(P, K - 1)) eta <- X %*% beta prob <- imultilogit(eta) distribution(y) <- categorical(prob) ## ----multiple_linear_lasso_data----------------------------------------------- data(attitude) design <- as.matrix(attitude[, 2:7]) ## ----multiple_linear_lasso_greta---------------------------------------------- int <- normal(0, 10) sd <- cauchy(0, 3, truncation = c(0, Inf)) tau <- exponential(0.5, dim = ncol(design)) coefs <- normal(0, tau) mu <- int + design %*% coefs distribution(attitude$rating) <- normal(mu, sd) ## ----hierarchical_linear_greta------------------------------------------------ # linear model parameters int <- normal(0, 10) coef <- normal(0, 10) sd <- cauchy(0, 3, truncation = c(0, Inf)) # hierarchical model for species effect; use the first species as the baseline # like in lm() species_sd <- lognormal(0, 1) species_offset <- normal(0, species_sd, dim = 2) species_effect <- rbind(0, species_offset) species_id <- as.numeric(iris$Species) # model mu <- int + coef * iris$Sepal.Width + species_effect[species_id] distribution(iris$Sepal.Length) <- normal(mu, sd) ## ----hierarchical_linear_slopes_greta----------------------------------------- # linear model parameters int <- normal(0, 10) coef <- normal(0, 10) sd <- cauchy(0, 3, truncation = c(0, Inf)) species_id <- as.numeric(iris$Species) # random intercepts species_int_sd <- lognormal(0, 1) species_int <- normal(0, species_int_sd, dim = 2) species_int_eff <- rbind(0, species_int) # random slopes species_slope_sd <- lognormal(0, 1) species_slope <- normal(0, species_slope_sd, dim = 2) species_slope_eff <- rbind(0, species_slope) # model mu <- int + coef * iris$Sepal.Width + species_int_eff[species_id] + iris$Sepal.Width * species_slope_eff[species_id] distribution(iris$Sepal.Length) <- normal(mu, sd) ## ----hierarchical_linear_slopes_corr_greta------------------------------------ # model matrix modmat <- model.matrix(~ Sepal.Width, iris) # index of species jj <- as.numeric(iris$Species) M <- ncol(modmat) # number of varying coefficients N <- max(jj) # number of species # prior on the standard deviation of the varying coefficient tau <- exponential(0.5, dim = M) # prior on the correlation between the varying coefficient Omega <- lkj_correlation(3, M) # optimization of the varying coefficient sampling through # cholesky factorization and whitening Omega_U <- chol(Omega) Sigma_U <- sweep(Omega_U, 2, tau, "*") z <- normal(0, 1, dim = c(N, M)) ab <- z %*% Sigma_U # equivalent to: ab ~ multi_normal(0, Sigma_U) # the linear predictor mu <- rowSums(ab[jj,] * modmat) # the residual variance sigma_e <- cauchy(0, 3, truncation = c(0, Inf)) #model y <- iris$Sepal.Length distribution(y) <- normal(mu, sigma_e) ## ----linear_uninformative_greta----------------------------------------------- # variables & priors int <- variable() coef <- variable() sd <- cauchy(0, 3, truncation = c(0, Inf)) # linear predictor mu <- int + coef * attitude$complaints # observation model distribution(attitude$rating) <- normal(mu, sd) ## ----linear_ridge_greta------------------------------------------------------- # variables & priors int <- variable() sd <- cauchy(0, 3, truncation = c(0, Inf)) tau <- inverse_gamma(1, 1) coef <- normal(0, tau) # linear predictor mu <- int + coef * attitude$complaints # observation model distribution(attitude$rating) <- normal(mu, sd) ## ----linear_lasso_greta------------------------------------------------------- # variables & priors int <- variable() sd <- inverse_gamma(1, 1) lambda <- gamma(1, 1) tau <- exponential(0.5 * lambda**2) coef <- normal(0, tau) # linear predictor mu <- int + coef * attitude$complaints # observation model distribution(attitude$rating) <- normal(mu, sd) ## ----linear_horseshoe_greta--------------------------------------------------- horseshoe <- function (tau = 1, dim = NULL) { lambda <- cauchy(0, 1, truncation = c(0, Inf), dim = dim) sd <- tau ^ 2 * lambda ^ 2 normal(0, sd, dim = dim) } # variables & priors int <- variable() sd <- inverse_gamma(1, 1) coef <- horseshoe() # linear predictor mu <- int + coef * attitude$complaints # observation model distribution(attitude$rating) <- normal(mu, sd) ## ----linear_finnish_horseshoe_greta------------------------------------------- regularized_horseshoe <- function (tau = 1, c = 1, dim = NULL) { stopifnot(c > 0) lambda <- cauchy(0, 1, truncation = c(0, Inf), dim = dim) lambda_tilde <- (c^2 * lambda^2) / (c^2 + tau^2 * lambda^2) sd <- tau ^ 2 * lambda_tilde ^ 2 normal(0, sd, dim = dim) } # variables & priors int <- variable() sd <- inverse_gamma(1, 1) coef <- regularized_horseshoe() # linear predictor mu <- int + coef * attitude$complaints # observation model distribution(attitude$rating) <- normal(mu, sd) ## ----hierarchical_linear_general_greta---------------------------------------- int <- normal(0, 10) coef <- normal(0, 10) sd <- cauchy(0, 3, truncation = c(0, Inf)) n_species <- length(unique(iris$Species)) species_id <- as.numeric(iris$Species) Z <- model.matrix(~ Species + Sepal.Length * Species - 1, data = iris) gamma_matrix <- multivariate_normal(matrix(0, 1, 2), diag(2), n_realisations = 3) gamma <- c(gamma_matrix) wi <- as_data(iris$Sepal.Width) Z <- as_data(Z) mu <- int + coef * wi + Z %*% gamma distribution(iris$Sepal.Length) <- normal(mu, sd) ## ----hierarchical_linear_marginal_greta--------------------------------------- int <- variable() coef <- normal(0, 5) sd <- cauchy(0, 3, truncation = c(0, Inf)) n_species <- length(unique(iris$Species)) species_id <- as.numeric(iris$Species) Z <- model.matrix(~ Species + Sepal.Length * Species - 1, data = iris) G <- zeros(n_species * 2, n_species * 2) for (s in unique(species_id)) { G[c(s, s + n_species), c(s, s + n_species)] <- diag(2) } mu <- int + coef * iris$Sepal.Width V <- zeros(nrow(iris), nrow(iris)) diag(V) <- sd Z <- as_data(Z) V <- V + Z %*% G %*% t(Z) sep <- t(iris$Sepal.Width) distribution(sep) <- multivariate_normal(t(mu), V) ## ----bayesian_neural_network_data, highlight = FALSE-------------------------- N <- 100 p <- 10 set.seed(23) X <- matrix(rnorm(N * p), N) beta <- rnorm(10) y <- X %*% beta + rnorm(N, sd = 0.1) ## ----bayesian_neural_network_greta-------------------------------------------- neural_network <- function(x) { # this can be arbitrarily complex, e.g. multiple hidden layers x %*% weights } weights <- normal(0, 1, dim = c(p, 1)) sd <- inverse_gamma(1, 1) distribution(y) <- normal(neural_network(X), sd) ## ----factor_analysis_data, highlight = FALSE---------------------------------- generate.data <- function(n = 100, p = 5, q = 2, psi = diag(rgamma(p, 1, 1))) { W <- matrix(rnorm(p * q, 1), p, q) Z <- matrix(rnorm(q * n, 2), q, n) WZ <- W %*% Z X <- matrix(0, n, p) for (i in seq_len(n)) { X[i, ] <- MASS::mvrnorm(1, WZ[, i], psi) } list(X = X, W = W, Z = Z, psi = psi) } n <- 100 p <- 5 q <- 2 data <- generate.data(n = n, p = p, q = q) X <- data$X ## ----factor_analysis---------------------------------------------------------- W <- normal(0, 1, dim = c(p, q)) Z <- normal(0, 1, dim = c(q, n)) psi <- zeros(p, p) diag(psi) <- inverse_gamma(1, 1, dim = p) distribution(X) <- multivariate_normal(t(W %*% Z), psi) ## ----air_data, highlight = FALSE---------------------------------------------- y <- c(21, 20, 15) n <- c(48, 34, 21) Z <- c(10, 30, 50) alpha <- 4.48 beta <- 0.76 sigma2 <- 81.14 sigma <- sqrt(sigma2) tau <- 1 / sigma2 J <- 3 ## ----air_greta---------------------------------------------------------------- theta <- normal(0, 32, dim = 2) mu <- alpha + beta * Z X <- normal(mu, sigma) p <- ilogit(theta[1] + theta[2] * X) distribution(y) <- binomial(n, p) ## ----air_stan, echo = FALSE--------------------------------------------------- cat(readLines('https://raw.githubusercontent.com/stan-dev/example-models/master/bugs_examples/vol2/air/air.stan'), sep = '\n') ## ----beetles_data, highlight = FALSE------------------------------------------ x <- c(1.6907, 1.7242, 1.7552, 1.7842, 1.8113, 1.8369, 1.8610, 1.8839) n <- c(59, 60, 62, 56, 63, 59, 62, 60) r <- c(6, 13, 18, 28, 52, 53, 61, 60) N <- 8 ## ----beetles_greta------------------------------------------------------------ alpha_star <- normal(0, 32) beta <- normal(0, 32) p <- ilogit(alpha_star + beta * (x - mean(x))) distribution(r) <- binomial(n, p) alpha <- alpha_star - beta * mean(x) rhat <- p * n ## ----beetles_stan, echo = FALSE----------------------------------------------- cat(readLines('https://raw.githubusercontent.com/stan-dev/example-models/master/bugs_examples/vol2/beetles/beetles_logit.stan'), sep = '\n') ## ----lightspeed_data, highlight = FALSE--------------------------------------- y <- c(28, 26, 33, 24, 34, -44, 27, 16, 40, -2, 29, 22, 24, 21, 25, 30, 23, 29, 31, 19, 24, 20, 36, 32, 36, 28, 25, 21, 28, 29, 37, 25, 28, 26, 30, 32, 36, 26, 30, 22, 36, 23, 27, 27, 28, 27, 31, 27, 26, 33, 26, 32, 32, 24, 39, 28, 24, 25, 32, 25, 29, 27, 28, 29, 16, 23) n <- length(y) ## ----lightspeed_greta--------------------------------------------------------- beta <- variable() sigma <- variable(lower = 0) distribution(y) <- normal(beta, sigma) ## ----lightspeed_stan, echo = FALSE-------------------------------------------- cat(readLines('https://raw.githubusercontent.com/stan-dev/example-models/master/ARM/Ch.8/lightspeed.stan'), sep = '\n') ## ----schools_data, highlight = FALSE------------------------------------------ y <- c(28, 8, -3, 7, -1, 1, 18, 12) sigma_y <- c(15, 10, 16, 11, 9, 11, 10, 18) N <- length(y) ## ----schools_greta------------------------------------------------------------ sigma_eta <- inverse_gamma(1, 1) eta <- normal(0, sigma_eta, dim=N) mu_theta <- normal(0, 100) xi <- normal(0, 5) theta <- mu_theta + xi * eta distribution(y) <- normal(theta, sigma_y) ## ----schools_stan, echo = FALSE----------------------------------------------- cat(readLines('https://raw.githubusercontent.com/stan-dev/example-models/master/ARM/Ch.19/schools.stan'), sep = '\n') ## ----data_logistic, highlight = FALSE----------------------------------------- # make fake data n_env <- 3 n_sites <- 20 # n_sites x n_env matrix of environmental variables env <- matrix(rnorm(n_sites * n_env), nrow = n_sites) # n_sites observations of species presence or absence occupancy <- rbinom(n_sites, 1, 0.5) ## ----logistic_greta----------------------------------------------------------- alpha <- normal(0, 10) beta <- normal(0, 10, dim = n_env) # logit-linear model linear_predictor <- alpha + env %*% beta p <- ilogit(linear_predictor) # distribution (likelihood) over observed values distribution(occupancy) <- bernoulli(p) ## ----data_poisson, highlight = FALSE------------------------------------------ # make fake data n_env <- 3 n_sites <- 20 # n_sites x n_env matrix of environmental variables env <- matrix(rnorm(n_sites * n_env), nrow = n_sites) # n_sites observations of species abundance occupancy <- rpois(n_sites, 5) ## ----poisson_greta------------------------------------------------------------ alpha <- normal(0, 10) beta <- normal(0, 10, dim = n_env) linear_predictor <- alpha + env %*% beta lambda <- exp(linear_predictor) distribution(occupancy) <- poisson(lambda) ## ----data_logistic_error_term_greta, highlight = FALSE------------------------ # make fake data n_env <- 3 n_sites <- 20 n_obs <- 5 # n_sites x n_env matrix of environmental variables env <- matrix(rnorm(n_sites * n_env), nrow = n_sites) # n_sites observations of species presence or absence over n_obs visits occupancy <- rbinom(n_sites, n_obs, 0.5) ## ----logistic_error_term_greta------------------------------------------------ alpha <- normal(0, 10) beta <- normal(0, 10, dim = n_env) error <- normal(0, 10, dim = n_sites) # logit-linear model with extra variation linear_predictor <- alpha + env %*% beta + error p <- ilogit(linear_predictor) # distribution (likelihood) over observed values distribution(occupancy) <- binomial(n_obs, p) ## ----data_multispecies_bernoulli, highlight = FALSE--------------------------- # make fake data n_species <- 5 n_env <- 3 n_sites <- 20 env <- matrix(rnorm(n_sites * n_env), nrow = n_sites) occupancy <- matrix(rbinom(n_species * n_sites, 1, 0.5), nrow = n_sites) ## ----multispecies_bernoulli_greta--------------------------------------------- alpha <- normal(0, 10, dim = n_species) beta <- normal(0, 10, dim = c(n_env, n_species)) env_effect <- env %*% beta # add intercepts for all species linear_predictor <- sweep(env_effect, 2, alpha, FUN = '+') # ilogit of linear predictor p <- ilogit(linear_predictor) # a single observation means our data are bernoulli distributed distribution(occupancy) <- bernoulli(p) ## ----data_multispecies_partially_pool, highlight = FALSE---------------------- # make fake data n_species <- 5 n_env <- 1 n_sites <- 50 env <- matrix(rnorm(n_sites * n_env), nrow = n_sites) occupancy <- matrix(rbinom(n_sites * n_species, 1, 0.5), nrow = n_sites) ## ----multispecies_partially_pool_greta---------------------------------------- global_alpha <- normal(0, 10, dim = 1) global_alpha_sd <- uniform(0, 10, dim = 1) alpha <- normal(global_alpha, global_alpha_sd, dim = n_species) global_betas <- normal(0, 10, dim = n_env) global_betas_sd <- uniform(0, 10, dim = n_env) beta <- normal(global_betas, global_betas_sd, dim = c(n_env, n_species)) env_effect <- env %*% beta # add intercepts for all species linear_predictor <- sweep(env_effect, 2, alpha, FUN = '+') # ilogit of linear predictor p <- ilogit(linear_predictor) distribution(occupancy) <- bernoulli(p) ## ----data_multilevel, highlight = FALSE--------------------------------------- # make fake data n_species <- 3 n_env <- 1 n_sites <- 5 n_traits <- 1 # n_sites x n_env matrix of environmental variables env <- matrix(rnorm(n_sites * n_env), nrow = n_sites) # n_species * n_traits matix of trait variables traits <- matrix(rnorm(n_species * n_traits), nrow = n_species) # n_sites * n_species matrix of observed occupancy occupancy <- matrix(rbinom(n_sites * n_species, 1, 0.5), nrow = n_sites) ## ----multilevel_greta--------------------------------------------------------- # include a column of 1's for intercept estimation in the sub-model (traits) and base model traits <- cbind(rep(1, n_species), traits) env <- cbind(rep(1, n_sites), env) # redefine n_env and n_traits after adding columns for intercepts n_env <- ncol(env) n_traits <- ncol(traits) # sub-model parameters have normal prior distributions g <- normal(0, 10, dim = c(n_env, n_traits)) # parameters of the base model are a function of the parameters of the sub-model beta <- g %*% t(traits) # use the coefficients to get the model linear predictor linear_predictor <- env %*% beta # use the logit link to get probabilities of occupancy p <- ilogit(linear_predictor) # data are bernoulli distributed distribution(occupancy) <- bernoulli(p) ## ----cjs_data, highlight = FALSE---------------------------------------------- n_obs <- 100 n_time <- 20 y <- matrix(sample(c(0, 1), size = (n_obs * n_time), replace = TRUE), ncol = n_time) ## ----cjs_greta---------------------------------------------------------------- # data summaries first_obs <- apply(y, 1, function(x) min(which(x > 0))) final_obs <- apply(y, 1, function(x) max(which(x > 0))) obs_id <- apply(y, 1, function(x) seq(min(which(x > 0)), max(which(x > 0)), by = 1)[-1]) obs_id <- unlist(obs_id) capture_vec <- apply(y, 1, function(x) x[min(which(x > 0)):max(which(x > 0))][-1]) capture_vec <- unlist(capture_vec) # priors phi <- beta(1, 1, dim = n_time) p <- beta(1, 1, dim = n_time) # derived parameter chi <- ones(n_time) for (i in seq_len(n_time - 1)) { tn <- n_time - i chi[tn] <- (1 - phi[tn]) + phi[tn] * (1 - p[tn + 1]) * chi[tn + 1] } # dummy variables alive_data <- ones(length(obs_id)) # definitely alive not_seen_last <- final_obs != 20 # ignore observations in last timestep final_observation <- ones(sum(not_seen_last)) # final observation # set likelihoods distribution(alive_data) <- bernoulli(phi[obs_id - 1]) distribution(capture_vec) <- bernoulli(p[obs_id]) distribution(final_observation) <- bernoulli(chi[final_obs[not_seen_last]]) ## ----cjs_stan, echo = FALSE--------------------------------------------------- cat(readLines('https://raw.githubusercontent.com/stan-dev/example-models/master/misc/ecology/mark-recapture/cjs-K.stan'), sep = '\n')