## ----echo = TRUE, message=FALSE, warning=FALSE-------------------------------- library(graphicalExtremes) library(ggplot2) library(igraph) ## ----include=FALSE, eval=FALSE------------------------------------------------ # rightAlign <- function(txt){ # paste0('
', txt, '
') # } # now <- Sys.time() # knitr::knit_hooks$set(timeit = function(before, options, envir) { # if (before) { # now <<- Sys.time() # } else { # rightAlign(sprintf("\n_Code execution time: %.3f seconds._\n", Sys.time() - now)) # } # }) ## ----include=FALSE------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = '#>', error = TRUE, fig.align = 'center', timeit = TRUE ) theme_set( theme_bw() + theme( plot.background = element_blank(), legend.background = element_blank(), strip.background = element_rect(fill = "white"), plot.caption=element_text( size=7.5, hjust=0, margin=margin(t=15)), text = element_text(size = 11), axis.ticks = element_blank(), panel.grid.major = element_line(size = 0.25) ) ) ## ----fig.width=6, fig.height=3------------------------------------------------ plotFlights(plotConnections = FALSE, map = "world", xyRatio = 2) ## ----fig.width=6, fig.height=4------------------------------------------------ # Get IATAs from Texas cluster IATAs <- getFlightDelayData('IATAs', 'tcCluster') # Plot airports + connections (indicating number of flights by thickness) plotFlights( IATAs, useAirportNFlights = TRUE, useConnectionNFlights = TRUE ) ## ----------------------------------------------------------------------------- # Check whether all dates from the train-test-split are available # (due to size restrictions, the CRAN version of this package does not contain all dates) allDatesAvailable <- tryCatch({ getFlightDelayData('dates', dateFilter = c('tcAll')) TRUE }, error = function(...) FALSE) cat('All dates avilable:', allDatesAvailable, '\n') # Create train and test data sets if(allDatesAvailable){ # Use train-test-split and threshold p from article matEst <- drop(getFlightDelayData('delays', 'tcCluster', 'tcTrain')) matVal <- drop(getFlightDelayData('delays', 'tcCluster', 'tcTest')) p <- 0.95 } else { # Take all available dates that do not contain NAs mat <- drop(getFlightDelayData('delays', 'tcCluster', 'all')) rowHasNA <- apply(is.na(mat), 1, any) mat <- mat[!rowHasNA, ] # Split dates in half splitInd <- floor(nrow(mat)/2) matEst <- mat[1:splitInd,] matVal <- mat[(splitInd+1):nrow(mat),] # Use a lower threshold to compensate for the smaller dataset p <- 0.8 } ## ----------------------------------------------------------------------------- # Compute the empirical extremal correlation matrix emp_chi_mat <- emp_chi(matEst, p = p) # Utility function to plot fitted parameters against true/empirical ones plot_fitted_params <- function(G0, G1, xlab = 'Empirical', ylab = 'Fitted'){ return( ggplot() + geom_point(aes( x = G0[upper.tri(G0)], y = G1[upper.tri(G1)] )) + geom_abline(slope = 1, intercept = 0) + xlab(xlab) + ylab(ylab) ) } ## ----------------------------------------------------------------------------- # Compute undirected flights per connection between selected airports nYears <- dim(flights$flightCounts)[3] flightsPerConnection <- apply(flights$flightCounts[IATAs,IATAs,], c(1, 2), sum) flightsPerConnectionUD <- flightsPerConnection + t(flightsPerConnection) # Make flight graph from adjacency matrix A <- 1 * (flightsPerConnectionUD > nYears * 12) flight_graph <- graph_from_adjacency_matrix(A, diag = FALSE, mode = "undirected") # Plot flight graph plotFlights(IATAs, graph = flight_graph, clipMap = 1.3, xyRatio = 1) ## ----message=FALSE, warning=FALSE--------------------------------------------- # Fit the model model_fit <- fmpareto_graph_HR( data = matEst, graph = flight_graph, p = p, method = "vario" ) # Compute likelihood/ICs flights_loglik_graph <- loglik_HR( data = matVal, p = p, graph = flight_graph, Gamma = model_fit ) cat("Flight graph test-loglikelihood =", round(flights_loglik_graph['loglik'], 2), "\n") # Plot fitted parameters plot_fitted_params(emp_chi_mat, Gamma2chi(model_fit)) ## ----collapse=TRUE, fig.align='', fig.show='hold', out.width='50%'------------ # Fit the model flights_emst_fit <- emst(data = matEst, p = p, method = "vario") # Compute likelihood/ICs flights_loglik_tree <- loglik_HR( data = matVal, p = p, Gamma = flights_emst_fit$Gamma, graph = flights_emst_fit$graph ) cat("Tree test-loglikelihood =", round(flights_loglik_tree['likelihood'], 2), "\n") # Plot fitted graph, parameters plotFlights( IATAs, graph = flights_emst_fit$graph, xyRatio = 1, clipMap = 1.3 ) plot_fitted_params(emp_chi_mat, Gamma2chi(flights_emst_fit$Gamma)) ## ----message=FALSE, warning=FALSE--------------------------------------------- # Fit the model rholist <- seq(0, 0.50, length.out = 11) flights_eglearn_fit <- eglearn(matEst, p = p, rholist = rholist, complete_Gamma = TRUE) # Compute likelihood/ICs flights_loglik <- sapply(seq_along(rholist), FUN = function(j) { loglik_HR( data = matVal, p = p, Gamma = flights_eglearn_fit$Gamma[[j]], graph = flights_eglearn_fit$graph[[j]] ) }) ## ----------------------------------------------------------------------------- ggplot( mapping = aes(x = rholist, y = flights_loglik['loglik', ])) + geom_line() + geom_point(shape = 21, size = 3, stroke = 1, fill = "white") + geom_hline(aes(yintercept = flights_loglik_graph['loglik']), lty = "dashed") + geom_hline(aes(yintercept = flights_loglik_tree['loglik']), lty = "dotted") + xlab("rho") + ylab("Log-likelihood") + scale_x_continuous( breaks = rholist, labels = round(rholist, 3), sec.axis = sec_axis( trans = ~., breaks = rholist, labels = sapply( flights_eglearn_fit$graph, igraph::gsize ), name = "Number of edges" ) ) best_index <- which.max(flights_loglik['loglik',]) cat('Best rho =', rholist[best_index], '\n') cat('Corresponding test-loglikelihood =', flights_loglik['loglik', best_index]) ## ----fig.align='', fig.show='hold', out.width='50%'--------------------------- best_graph <- flights_eglearn_fit$graph[[best_index]] best_Gamma <- flights_eglearn_fit$Gamma[[best_index]] plotFlights(IATAs, graph = best_graph, clipMap = 1.3, xyRatio = 1) plot_fitted_params(emp_chi_mat, Gamma2chi(best_Gamma))