params <- list(EVAL = TRUE) ## ---- echo = FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, warning = FALSE, eval = if (isTRUE(exists("params"))) params$EVAL else FALSE ) ## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set( echo = TRUE, dpi = 100, fig.asp = 0.8, fig.width = 6, out.width = "60%", fig.align = "center") library(mvgam) library(ggplot2) library(dplyr) # A custom ggplot2 theme theme_set(theme_classic(base_size = 12, base_family = 'serif') + theme(axis.line.x.bottom = element_line(colour = "black", size = 1), axis.line.y.left = element_line(colour = "black", size = 1))) options(ggplot2.discrete.colour = c("#A25050", "#00008b", 'darkred', "#010048"), ggplot2.discrete.fill = c("#A25050", "#00008b", 'darkred', "#010048")) ## ----------------------------------------------------------------------------- set.seed(999) # Simulate observations for species 1, which shows a declining trend and 0.7 detection probability data.frame(site = 1, # five replicates per year; six years replicate = rep(1:5, 6), time = sort(rep(1:6, 5)), species = 'sp_1', # true abundance declines nonlinearly truth = c(rep(28, 5), rep(26, 5), rep(23, 5), rep(16, 5), rep(14, 5), rep(14, 5)), # observations are taken with detection prob = 0.7 obs = c(rbinom(5, 28, 0.7), rbinom(5, 26, 0.7), rbinom(5, 23, 0.7), rbinom(5, 15, 0.7), rbinom(5, 14, 0.7), rbinom(5, 14, 0.7))) %>% # add 'series' information, which is an identifier of site, replicate and species dplyr::mutate(series = paste0('site_', site, '_', species, '_rep_', replicate), time = as.numeric(time), # add a 'cap' variable that defines the maximum latent N to # marginalize over when estimating latent abundance; in other words # how large do we realistically think the true abundance could be? cap = 100) %>% dplyr::select(- replicate) -> testdat # Now add another species that has a different temporal trend and a smaller # detection probability (0.45 for this species) testdat = testdat %>% dplyr::bind_rows(data.frame(site = 1, replicate = rep(1:5, 6), time = sort(rep(1:6, 5)), species = 'sp_2', truth = c(rep(4, 5), rep(7, 5), rep(15, 5), rep(16, 5), rep(19, 5), rep(18, 5)), obs = c(rbinom(5, 4, 0.45), rbinom(5, 7, 0.45), rbinom(5, 15, 0.45), rbinom(5, 16, 0.45), rbinom(5, 19, 0.45), rbinom(5, 18, 0.45))) %>% dplyr::mutate(series = paste0('site_', site, '_', species, '_rep_', replicate), time = as.numeric(time), cap = 50) %>% dplyr::select(-replicate)) ## ----------------------------------------------------------------------------- testdat$species <- factor(testdat$species, levels = unique(testdat$species)) testdat$series <- factor(testdat$series, levels = unique(testdat$series)) ## ----------------------------------------------------------------------------- dplyr::glimpse(testdat) head(testdat, 12) ## ----------------------------------------------------------------------------- testdat %>% # each unique combination of site*species is a separate process dplyr::mutate(trend = as.numeric(factor(paste0(site, species)))) %>% dplyr::select(trend, series) %>% dplyr::distinct() -> trend_map trend_map ## ----include = FALSE, results='hide'------------------------------------------ mod <- mvgam( # the observation formula sets up linear predictors for # detection probability on the logit scale formula = obs ~ species - 1, # the trend_formula sets up the linear predictors for # the latent abundance processes on the log scale trend_formula = ~ s(time, by = trend, k = 4) + species, # the trend_map takes care of the mapping trend_map = trend_map, # nmix() family and data family = nmix(), data = testdat, # priors can be set in the usual way priors = c(prior(std_normal(), class = b), prior(normal(1, 1.5), class = Intercept_trend)), samples = 1000) ## ----eval = FALSE------------------------------------------------------------- ## mod <- mvgam( ## # the observation formula sets up linear predictors for ## # detection probability on the logit scale ## formula = obs ~ species - 1, ## ## # the trend_formula sets up the linear predictors for ## # the latent abundance processes on the log scale ## trend_formula = ~ s(time, by = trend, k = 4) + species, ## ## # the trend_map takes care of the mapping ## trend_map = trend_map, ## ## # nmix() family and data ## family = nmix(), ## data = testdat, ## ## # priors can be set in the usual way ## priors = c(prior(std_normal(), class = b), ## prior(normal(1, 1.5), class = Intercept_trend)), ## samples = 1000) ## ----------------------------------------------------------------------------- code(mod) ## ----------------------------------------------------------------------------- summary(mod) ## ----------------------------------------------------------------------------- loo(mod) ## ----------------------------------------------------------------------------- plot(mod, type = 'smooths', trend_effects = TRUE) ## ----------------------------------------------------------------------------- marginaleffects::plot_predictions(mod, condition = 'species', type = 'detection') + ylab('Pr(detection)') + ylim(c(0, 1)) + theme_classic() + theme(legend.position = 'none') ## ----------------------------------------------------------------------------- hc <- hindcast(mod, type = 'latent_N') # Function to plot latent abundance estimates vs truth plot_latentN = function(hindcasts, data, species = 'sp_1'){ all_series <- unique(data %>% dplyr::filter(species == !!species) %>% dplyr::pull(series)) # Grab the first replicate that represents this series # so we can get the true simulated values series <- as.numeric(all_series[1]) truths <- data %>% dplyr::arrange(time, series) %>% dplyr::filter(series == !!levels(data$series)[series]) %>% dplyr::pull(truth) # In case some replicates have missing observations, # pull out predictions for ALL replicates and average over them hcs <- do.call(rbind, lapply(all_series, function(x){ ind <- which(names(hindcasts$hindcasts) %in% as.character(x)) hindcasts$hindcasts[[ind]] })) # Calculate posterior empirical quantiles of predictions pred_quantiles <- data.frame(t(apply(hcs, 2, function(x) quantile(x, probs = c(0.05, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.95))))) pred_quantiles$time <- 1:NROW(pred_quantiles) pred_quantiles$truth <- truths # Grab observations data %>% dplyr::filter(series %in% all_series) %>% dplyr::select(time, obs) -> observations # Plot ggplot(pred_quantiles, aes(x = time, group = 1)) + geom_ribbon(aes(ymin = X5., ymax = X95.), fill = "#DCBCBC") + geom_ribbon(aes(ymin = X30., ymax = X70.), fill = "#B97C7C") + geom_line(aes(x = time, y = truth), colour = 'black', linewidth = 1) + geom_point(aes(x = time, y = truth), shape = 21, colour = 'white', fill = 'black', size = 2.5) + geom_jitter(data = observations, aes(x = time, y = obs), width = 0.06, shape = 21, fill = 'darkred', colour = 'white', size = 2.5) + labs(y = 'Latent abundance (N)', x = 'Time', title = species) } ## ----------------------------------------------------------------------------- plot_latentN(hc, testdat, species = 'sp_1') plot_latentN(hc, testdat, species = 'sp_2') ## ----------------------------------------------------------------------------- # Date link load(url('https://github.com/doserjef/spAbundance/raw/main/data/dataNMixSim.rda')) data.one.sp <- dataNMixSim # Pull out observations for one species data.one.sp$y <- data.one.sp$y[1, , ] # Abundance covariates that don't change across repeat sampling observations abund.cov <- dataNMixSim$abund.covs[, 1] abund.factor <- as.factor(dataNMixSim$abund.covs[, 2]) # Detection covariates that can change across repeat sampling observations # Note that `NA`s are not allowed for covariates in mvgam, so we randomly # impute them here det.cov <- dataNMixSim$det.covs$det.cov.1[,] det.cov[is.na(det.cov)] <- rnorm(length(which(is.na(det.cov)))) det.cov2 <- dataNMixSim$det.covs$det.cov.2 det.cov2[is.na(det.cov2)] <- rnorm(length(which(is.na(det.cov2)))) ## ----------------------------------------------------------------------------- mod_data <- do.call(rbind, lapply(1:NROW(data.one.sp$y), function(x){ data.frame(y = data.one.sp$y[x,], abund_cov = abund.cov[x], abund_fac = abund.factor[x], det_cov = det.cov[x,], det_cov2 = det.cov2[x,], replicate = 1:NCOL(data.one.sp$y), site = paste0('site', x)) })) %>% dplyr::mutate(species = 'sp_1', series = as.factor(paste0(site, '_', species, '_', replicate))) %>% dplyr::mutate(site = factor(site, levels = unique(site)), species = factor(species, levels = unique(species)), time = 1, cap = max(data.one.sp$y, na.rm = TRUE) + 20) ## ----------------------------------------------------------------------------- NROW(mod_data) dplyr::glimpse(mod_data) head(mod_data) ## ----------------------------------------------------------------------------- mod_data %>% # each unique combination of site*species is a separate process dplyr::mutate(trend = as.numeric(factor(paste0(site, species)))) %>% dplyr::select(trend, series) %>% dplyr::distinct() -> trend_map trend_map %>% dplyr::arrange(trend) %>% head(12) ## ----include = FALSE, results='hide'------------------------------------------ mod <- mvgam( # effects of covariates on detection probability; # here we use penalized splines for both continuous covariates formula = y ~ s(det_cov, k = 3) + s(det_cov2, k = 3), # effects of the covariates on latent abundance; # here we use a penalized spline for the continuous covariate and # hierarchical intercepts for the factor covariate trend_formula = ~ s(abund_cov, k = 3) + s(abund_fac, bs = 're'), # link multiple observations to each site trend_map = trend_map, # nmix() family and supplied data family = nmix(), data = mod_data, # standard normal priors on key regression parameters priors = c(prior(std_normal(), class = 'b'), prior(std_normal(), class = 'Intercept'), prior(std_normal(), class = 'Intercept_trend'), prior(std_normal(), class = 'sigma_raw_trend')), # use Stan's variational inference for quicker results algorithm = 'meanfield', # no need to compute "series-level" residuals residuals = FALSE, samples = 1000) ## ----eval=FALSE--------------------------------------------------------------- ## mod <- mvgam( ## # effects of covariates on detection probability; ## # here we use penalized splines for both continuous covariates ## formula = y ~ s(det_cov, k = 4) + s(det_cov2, k = 4), ## ## # effects of the covariates on latent abundance; ## # here we use a penalized spline for the continuous covariate and ## # hierarchical intercepts for the factor covariate ## trend_formula = ~ s(abund_cov, k = 4) + ## s(abund_fac, bs = 're'), ## ## # link multiple observations to each site ## trend_map = trend_map, ## ## # nmix() family and supplied data ## family = nmix(), ## data = mod_data, ## ## # standard normal priors on key regression parameters ## priors = c(prior(std_normal(), class = 'b'), ## prior(std_normal(), class = 'Intercept'), ## prior(std_normal(), class = 'Intercept_trend'), ## prior(std_normal(), class = 'sigma_raw_trend')), ## ## # use Stan's variational inference for quicker results ## algorithm = 'meanfield', ## ## # no need to compute "series-level" residuals ## residuals = FALSE, ## samples = 1000) ## ----------------------------------------------------------------------------- summary(mod, include_betas = FALSE) ## ----------------------------------------------------------------------------- marginaleffects::avg_predictions(mod, type = 'detection') ## ----------------------------------------------------------------------------- abund_plots <- plot(conditional_effects(mod, type = 'link', effects = c('abund_cov', 'abund_fac')), plot = FALSE) ## ----------------------------------------------------------------------------- abund_plots[[1]] + ylab('Expected latent abundance') ## ----------------------------------------------------------------------------- abund_plots[[2]] + ylab('Expected latent abundance') ## ----------------------------------------------------------------------------- det_plots <- plot(conditional_effects(mod, type = 'detection', effects = c('det_cov', 'det_cov2')), plot = FALSE) ## ----------------------------------------------------------------------------- det_plots[[1]] + ylab('Pr(detection)') det_plots[[2]] + ylab('Pr(detection)') ## ----------------------------------------------------------------------------- fivenum_round = function(x)round(fivenum(x, na.rm = TRUE), 2) marginaleffects::plot_predictions(mod, newdata = marginaleffects::datagrid(det_cov = unique, det_cov2 = fivenum_round), by = c('det_cov', 'det_cov2'), type = 'detection') + theme_classic() + ylab('Pr(detection)')