## ----setup, include = FALSE---------------------------------------------- source("setup/setup.R") ## ----zoom---------------------------------------------------------------- # library(dm) # library(dplyr) # # flights_dm <- dm_nycflights13(cycle = TRUE) # flights_dm # flights_keyed <- # flights_dm %>% # dm_get_tables(keyed = TRUE) # # # The print output for a `dm_keyed_tbl` looks very much like that from a normal # # `tibble`, with additional details about keys. # flights_keyed$flights # flights_tbl_mutate <- # flights_keyed$flights %>% # mutate(am_pm_dep = if_else(dep_time < 1200, "am", "pm"), .after = dep_time) # # flights_tbl_mutate ## ----zoom2--------------------------------------------------------------- # updated_flights_dm <- dm( # flights = flights_tbl_mutate, # !!!flights_keyed[c("airlines", "airports", "planes", "weather")] # ) # # # The only difference in the `dm` print output is the increased number of # # columns # updated_flights_dm # # The schematic view of the data model remains unchanged # dm_draw(updated_flights_dm) ## ------------------------------------------------------------------------ # library(tidyr) # # flights_keyed$weather # # # Maybe there is some hidden candidate for a primary key that we overlooked? # enum_pk_candidates(flights_keyed$weather) # # Seems we have to construct a column with unique values # # This can be done by combining column `origin` with `time_hour`, if the latter # # is converted to a single time zone first; all within the `dm`: # weather_tbl_mutate <- # flights_keyed$weather %>% # # first convert all times to the same time zone: # mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>% # # paste together as character the airport code and the time # unite("origin_slot_id", origin, time_hour_fmt) %>% # select(origin_slot_id, everything()) # # # check if we the result is as expected: # weather_tbl_mutate %>% # enum_pk_candidates() %>% # filter(candidate) # # We apply the same transformation to create # # the foreign key in the flights table: # flights_tbl_mutate <- # flights_keyed$flights %>% # mutate(time_hour_fmt = format(time_hour, tz = "UTC")) %>% # unite("origin_slot_id", origin, time_hour_fmt) %>% # select(origin_slot_id, everything()) # # surrogate_flights_dm <- # dm( # weather = weather_tbl_mutate, # flights = flights_tbl_mutate, # !!!flights_keyed[c("airlines", "airports", "planes")] # ) %>% # dm_add_pk(weather, origin_slot_id) %>% # dm_add_fk(flights, origin_slot_id, weather) # # surrogate_flights_dm %>% # dm_draw() ## ------------------------------------------------------------------------ # disentangled_flights_dm <- # dm( # destination = flights_keyed$airports, # origin = flights_keyed$airports, # !!!flights_keyed[c("flights", "airlines", "planes", "weather")] # ) %>% # # Key relations are also duplicated, so the wrong ones need to be removed # dm_rm_fk(flights, dest, origin) %>% # dm_rm_fk(flights, origin, destination) # # disentangled_flights_dm %>% # dm_draw() ## ------------------------------------------------------------------------ # flights_derived <- # flights_dm %>% # pull_tbl(flights, keyed = TRUE) %>% # dplyr::count(origin, carrier) # # derived_flights_dm <- dm(flights_derived, !!!flights_keyed) # # derived_flights_dm %>% # dm_draw() ## ------------------------------------------------------------------------ # planes_for_join <- # flights_keyed$planes %>% # select(tailnum, plane_type = type) # # joined_flights_tbl <- # flights_keyed$flights %>% # # let's first reduce the number of columns of flights # select(-dep_delay:-arr_delay, -air_time:-minute, -starts_with("sched_")) %>% # # in the {dm}-method for the joins you can specify which columns you want to # # add to the subsetted table # left_join(planes_for_join) # # joined_flights_dm <- dm( # flights_plane_type = joined_flights_tbl, # !!!flights_keyed[c("airlines", "airports", "weather")] # ) # # # this is how the table looks now # joined_flights_dm$flights_plane_type # # also here, the FK-relations are transferred to the new table # joined_flights_dm %>% # dm_draw() ## ------------------------------------------------------------------------ # dm <- dm_nycflights13() # dm_deconstruct(dm)