params <- list(do_calc = FALSE) ## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.align = "center", fig.width = 7 ) library(kableExtra) library(readr) ## ----libs, message=FALSE------------------------------------------------------ # load required packages library(LocalCop) # for conditional copula modeling library(VineCopula) # for simulating copula data library(dplyr) # for data manipulations library(tidyr) # and library(ggplot2) # plotting ## ----calib, echo = FALSE------------------------------------------------------ tab <- read_csv("copula_table.csv", show_col_types = FALSE) tab_cap <- "Copula families implemented in **LocalCop**." kableExtra::kbl(tab, booktabs = TRUE, caption = tab_cap) ## ----dgm, message=FALSE, fig.height=3, fig.cap="Conditional Kendall $\\tau$ for Gumbel copula under DGM."---- # simulate copula data given a covariate set.seed(2024) family <- 4 # Gumbel Copula n <- 1000 # number of observations x <- sort(runif(n)) # covariate values eta_fun <- function(x) sin(6*pi*x) # calibration function # simulate data eta_true <- eta_fun(x) par_true <- BiCopEta2Par(family = family, eta = eta_true) # copula parameter udata <- VineCopula::BiCopSim(n, family = family, par = par_true$par) # plot tau(x) tibble( x = seq(0, 1, len = 100), ) %>% mutate( tau = BiCopEta2Tau(family, eta = eta_fun(x)) ) %>% ggplot(aes(x = x, y = tau)) + geom_line() + ylim(c(0, 1)) + xlab(expression(x)) + ylab(expression(tau(x))) + theme_bw() ## ----local1, message=FALSE---------------------------------------------------- x0 <- 0.1 band <- 0.1 degree <- 1 kernel <- KernEpa # Epanichov kernel (default value) fit1 <- CondiCopLocFit( u1 = udata[,1], u2 = udata[,2], x = x, x0 = x0, family = family, degree = degree, kernel = kernel, band = band ) fit1 ## ----localseq, message=FALSE, fig.height=3.5, warning=FALSE, fig.cap="Conditional Kendall $\\tau$ estimates under the Gumbel copula using bandwidth $h=0.1$."---- x0 <- seq(0, 1, by=0.02) fitseq <- CondiCopLocFit( u1 = udata[,1], u2 = udata[,2], x = x, x0 = x0, family = family, degree = degree, kernel = kernel, band = band ) # plot true vs fitted tau legend_names <- c(expression(tau(x)), expression(hat(tau)(x))) tibble( x = x0, True = BiCopEta2Tau(family, eta = eta_fun(x0)), Fitted = BiCopEta2Tau(fitseq$eta, family= family) ) %>% pivot_longer(True:Fitted, names_to = "Type", values_to = "y") %>% mutate( Type = factor(Type, levels = c("True", "Fitted")) ) %>% ggplot(aes(x = x, y = y, group = Type)) + geom_line(aes(color = Type, linetype = Type)) + geom_point(aes(shape = Type, color = Type)) + scale_color_manual(values = c("black", "red"), labels = legend_names) + scale_shape_manual(values = c(NA, 16), labels = legend_names) + scale_linetype_manual(values = c("solid", NA), labels = legend_names) + ylim(c(0, 1)) + xlab(expression(x)) + ylab(expression("Kendall "*tau)) + theme_bw() + theme( legend.position = "bottom", legend.title = element_blank() ) ## ----select1-precalc---------------------------------------------------------- bandset <- c(0.1, 0.2, 0.4, 0.8, 1) # bandwidth set famset <- c(1, 2, 3, 4, 5) # family set n_loo <- 100 # number of leave-one-out observations in CV likelihood calculation ## ----select1-calc, eval = params$do_calc, message=FALSE----------------------- # system.time({ # cvselect1 <- CondiCopSelect( # u1 = udata[,1], u2 = udata[,2], x = x, # family = famset, # degree = degree, # kernel = kernel, # band = bandset, # xind = n_loo # ) # }) ## ----select1-save, eval = params$do_calc, echo = FALSE------------------------ # saveRDS(cvselect1, file = "cvselect1.rds") ## ----select1-load, eval = !params$do_calc, echo = FALSE----------------------- cvselect1 <- readRDS("cvselect1.rds") ## ----select1------------------------------------------------------------------ cv_res1 <- cvselect1$cv cv_res1 ## ----select2-calc, eval = params$do_calc, message=FALSE----------------------- # system.time({ # cvselect2 <- CondiCopSelect( # u1 = udata[,1], u2 = udata[,2], x = x, # family = famset, # degree = degree, # kernel = kernel, # band = bandset, # xind = nrow(udata) # ) # }) ## ----select2-save, eval = params$do_calc, echo = FALSE------------------------ # saveRDS(cvselect2, file = "cvselect2.rds") ## ----select2-load, eval = !params$do_calc, echo = FALSE----------------------- cvselect2 <- readRDS("cvselect2.rds") ## ----select2------------------------------------------------------------------ cv_res2 <- cvselect2$cv cv_res2 ## ----cvplot, message=FALSE, fig.height=3.5, fig.cap="Cross-validated likelihood for copula and bandwidth selection based on a subset and the full sample."---- fam_names <- c("Gaussian", "Student", "Clayton", "Gumbel", "Frank") bind_rows(as_tibble(cvselect1$cv) %>% mutate(n = n_loo), as_tibble(cvselect2$cv) %>% mutate(n = nrow(udata))) %>% mutate( family = factor(family, levels = c(1,2,3,4,5), labels = fam_names), Bandwidth = factor(band), n = factor(paste0("n = ", n)) ) %>% ggplot(aes(x = family, y = cv, fill = Bandwidth)) + geom_bar(stat="identity", position = "dodge") + facet_grid(. ~ n) + scale_fill_brewer(palette="Blues", direction=-1) + xlab("") + ylab("CV Likelihood") + theme_bw() + theme(legend.position = "bottom", legend.direction = "horizontal") ## ----localseq2, message=FALSE, fig.align='center', fig.width=7, fig.height=5, fig.cap="Conditional Kendall $\\tau$ estimates under different copula families."---- x0 <- seq(0, 1, by=0.01) tau_est <- sapply(1:5, function(fam_id) { fit <- CondiCopLocFit( u1 = udata[,1], u2 = udata[,2], x = x, x0 = x0, family = fam_id, degree = degree, kernel = kernel, band = band) BiCopEta2Tau(fit$eta, family=fam_id) }) colnames(tau_est) <- fam_names as_tibble(tau_est) %>% mutate( x = x0, True = BiCopEta2Tau(family, eta = eta_fun(x0)) ) %>% pivot_longer(!x, names_to = "Type", values_to = "tau") %>% mutate( Type = factor(Type, levels = c("True", fam_names)) ) %>% ggplot(aes(x = x, y = tau, group = Type)) + geom_line(aes(col = Type, linewidth = Type)) + scale_color_manual( values = c("black", "red", "blue", "brown", "orange", "green4") ) + scale_linewidth_manual(values = c(1, rep(.5, 5))) + ylim(c(0, 1)) + xlab(expression(x)) + ylab(expression(tau(x))) + theme_bw() + theme( legend.position = "bottom", legend.title = element_blank(), )