---
title: "API limits and loops"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{API limits and loops}
  %\VignetteEncoding{UTF-8}
  %\VignetteEngine{knitr::rmarkdown}
editor_options: 
  chunk_output_type: console
---

```{r, include = FALSE}
# use eval = NOT_CRAN in the chunks connecting to API, to avoid errors or warnings in CRAN checks
NOT_CRAN <- identical(tolower(Sys.getenv("NOT_CRAN")), "true")
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  purl = NOT_CRAN
)

# env keyring
withr::local_options(list("keyring_backend" = "env"))
```

```{r setup}
library(meteospain)
library(sf)
```

The following APIs impose a period limit in the data retrieved, not allowing querying more than the
predetermined period in each API.

## AEMET API

AEMET API limit the daily data download to 15 days, and the monthly and yearly data to 36 months:

```{r aemet_limit, error=TRUE, eval=NOT_CRAN}
# aemet api has a limit for 15 days in daily:
get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'daily',
    start_date = as.Date('1990-01-01'),
    end_date = as.Date('1990-12-31')
  )
)

# and monthly and yearly data to 36 months
get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'yearly',
    start_date = as.Date('2005-01-01'),
    end_date = as.Date('2020-12-31'),
    stations = "0149X"
  )
)
```

This means that with one call to `get_meteo_from` to the AEMET service, one can only download 15 days of data,
(or 36 months in monthly and yearly data).  
If the period needed is bigger than that, one option is performing all the calls necessary and join the
results:

```{r, manual, eval=NOT_CRAN}
res_1990_jan_1 <- get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'daily',
    start_date = as.Date('1990-01-01'),
    end_date = as.Date('1990-01-15')
  )
)

res_1990_jan_2 <- get_meteo_from(
  'aemet',
  aemet_options(
    api_key = keyring::key_get('aemet'),
    resolution = 'daily',
    start_date = as.Date('1990-01-16'),
    end_date = as.Date('1990-01-31')
  )
)

res_1990_jan <- rbind(res_1990_jan_1, res_1990_jan_2)
res_1990_jan
```

While for short periods this can be easily done, when needing long periods (years, decades), this can be
tedious and prone to error (or at least involves a lot of *copy&paste* and generate longer scripts).  
To avoid this, we can use loops, both in a tidyverse way (`purrr::map`) or in a more classic approach (`for`).
For both ways, the first thing to do is create the vectors of dates to retrieve:

```{r dates_vecs}
# First, we prepare the date vectors, with the start and end dates.
start_dates <- seq(as.Date('1990-01-01'), as.Date('1990-07-01'), '15 days')
end_dates <- seq(as.Date('1990-01-15'), as.Date('1990-07-15'), '15 days')

# Both vectors must have the same length
length(start_dates) == length(end_dates)

# lets see them
data.frame(start_dates, end_dates)
```

### tidyverse loop

We are gonna use `purrr::map2`, to iterate both date vectors at the same time and return a data frame
with all the results directly:

```{r tidyverse_loop, eval=NOT_CRAN}
# tidyverse map
res_tidyverse <-
  purrr::map2(
    .x = start_dates, .y = end_dates,
    .f = function(start_date, end_date) {
      res <- get_meteo_from(
        'aemet',
        aemet_options(
          api_key = keyring::key_get('aemet'),
          resolution = 'daily',
          start_date = start_date,
          end_date = end_date
        )
      )
      return(res)
    }
  ) |>
  purrr::list_rbind()

head(res_tidyverse)
```

### for loop

We use `base::for`, iterating by the index of the dates vectors:

```{r for_loop, eval=NOT_CRAN}
# base for loop
res_for <- data.frame()

for (index in seq_along(start_dates)) {
  temp_res <- get_meteo_from(
    'aemet',
    aemet_options(
      api_key = keyring::key_get('aemet'),
      resolution = 'daily',
      start_date = start_dates[index],
      end_date = end_dates[index]
    )
  )
  
  res_for <- rbind(res_for, temp_res)
}

head(res_for)
```


Both methods return identical results:

```{r identical, eval=NOT_CRAN}
# both are identical
identical(res_tidyverse, res_for)
```

  > In a loop, no matter if a `purrr::map` or a `for` loop, each iteration will connect with the API, consuming
  connections from the user quota. Take this into consideration when creating loops for longer periods, as you
  can reach your API request limits for the day/month... (it depends on the service API).

## MeteoCat API

When using MeteoCat in `daily`, `monthly` and `yearly` there are restrictions on the period that can be
accessed.

### `daily`

`daily` always returns the whole month the date selected is in, i.e. for `start_date = as.Date('2020-04-10')`
it will return all days in April, 2020:
  
```{r meteocat_daily, eval=NOT_CRAN}
api_options <- meteocat_options(
  'daily', start_date = as.Date('2020-04-10'),
  api_key = keyring::key_get('meteocat')
)
april_2020 <- get_meteo_from('meteocat', api_options)
unique(april_2020$timestamp)
```

This means that if we want more than one month, we need to use loops in a similar way as described previously
for AEMET:

```{r meteocat_daily_loops, eval=NOT_CRAN}
start_dates <- seq(as.Date('2020-01-01'), as.Date('2020-04-01'), 'months')
# tidyverse map
meteocat_2020q1_tidyverse <-
  purrr::map(
    .x = start_dates,
    .f = function(start_date) {
      res <- get_meteo_from(
        'meteocat',
        meteocat_options(
          api_key = keyring::key_get('meteocat'),
          resolution = 'daily',
          start_date = start_date
        )
      )
      return(res)
    }
  ) |>
  purrr::list_rbind()

head(meteocat_2020q1_tidyverse)

# base for loop
meteocat_2020q1_for <- data.frame()

for (index in seq_along(start_dates)) {
  temp_res <- get_meteo_from(
    'meteocat',
    meteocat_options(
      api_key = keyring::key_get('meteocat'),
      resolution = 'daily',
      start_date = start_dates[index]
    )
  )
  
  meteocat_2020q1_for <- rbind(meteocat_2020q1_for, temp_res)
}

head(meteocat_2020q1_for)

# both are identical
identical(meteocat_2020q1_tidyverse, meteocat_2020q1_for)
```

### `monthly`

`monthly` always returns the whole year the date selected is in, i.e. for `start_date = as.Date('2020-04-10')`
it will return all months in 2020:
  
```{r meteocat_monthly, eval=NOT_CRAN}
api_options <- meteocat_options(
  'monthly', start_date = as.Date('2020-04-10'),
  api_key = keyring::key_get('meteocat')
)
year_2020 <- get_meteo_from('meteocat', api_options)
unique(year_2020$timestamp)
```

Which means that if we need more than one year of monthly data, we need to use loops again:

```{r meteocat_monthly_loops, eval=NOT_CRAN}
start_dates <- seq(as.Date('2019-01-01'), as.Date('2020-01-01'), 'years')
# tidyverse map
meteocat_2019_20_tidyverse <-
  purrr::map(
    .x = start_dates,
    .f = function(start_date) {
      res <- get_meteo_from(
        'meteocat',
        meteocat_options(
          api_key = keyring::key_get('meteocat'),
          resolution = 'monthly',
          start_date = start_date
        )
      )
      return(res)
    }
  ) |>
  purrr::list_rbind()

head(meteocat_2019_20_tidyverse)

# base for loop
meteocat_2019_20_for <- data.frame()

for (index in seq_along(start_dates)) {
  temp_res <- get_meteo_from(
    'meteocat',
    meteocat_options(
      api_key = keyring::key_get('meteocat'),
      resolution = 'monthly',
      start_date = start_dates[index]
    )
  )
  
  meteocat_2019_20_for <- rbind(meteocat_2019_20_for, temp_res)
}
head(meteocat_2019_20_for)

# both are identical
identical(meteocat_2019_20_tidyverse, meteocat_2019_20_for)
```

### `yearly`

`yearly` always returns all available years and `start_date` argument is ignored, i.e. using
`start_date = as.Date('2020-04-10')` will return all years, independently of the date supplied:
  
```{r meteocat_yearly, eval=NOT_CRAN}
api_options <- meteocat_options(
  'yearly', start_date = as.Date('2020-04-10'),
  api_key = keyring::key_get('meteocat')
)
all_years <- get_meteo_from('meteocat', api_options)
unique(all_years$timestamp)
```

This means that with yearly we always get all the data available, so there is no need of loops.