## ----setup, include = FALSE--------------------------------------------------- knitr::opts_chunk$set(echo = FALSE, collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE ) options(rmarkdown.html_vignette.check_title = FALSE) ## ----include score integration image, echo=FALSE, out.width = '85%', out.height = '85%', fig.cap = 'Figure 1. Graphic representation of how measured soil properties are aggregated to scores.'---- # include graphic knitr::include_graphics('../vignettes/OBIC_score_integratie_2.png') ## ----load binnenveld---------------------------------------------------------- # load packages library(OBIC); library(data.table); library(ggplot2); library(patchwork) setDTthreads(1) # load data dt <- OBIC::binnenveld[ID==1] ## ----reformatting code, echo = TRUE, eval=FALSE------------------------------- # # Step 3 Reformat dt given weighing per indicator and prepare for aggregation ------------------ # # # load weights.obic (set indicator to zero when not applicable) # w <- as.data.table(OBIC::weight.obic) # # # Add years per field # dt[,year := 1:.N, by = ID] # # # Select all indicators used for scoring # cols <- colnames(dt)[grepl('I_C|I_B|I_P|I_E|I_M|year|crop_cat|SOILT',colnames(dt))] # # # Melt dt and assign main categories for OBI # dt.melt <- melt(dt[,mget(cols)], # id.vars = c('B_SOILTYPE_AGR','crop_category','year'), # variable.name = 'indicator') # # # add categories relevant for aggregating # # C = chemical, P = physics, B = biological, BCS = visual soil assessment # # indicators not used for integrating: IBCS and IM # dt.melt[,cat := tstrsplit(indicator,'_',keep = 2)] # dt.melt[grepl('_BCS$',indicator) & indicator != 'I_BCS', cat := 'IBCS'] # dt.melt[grepl('^I_M_',indicator), cat := 'IM'] # # # Determine number of indicators per category # dt.melt.ncat <- dt.melt[year==1 & !cat %in% c('IBCS','IM')][,list(ncat = .N),by='cat'] # # # add weighing factor to indicator values # dt.melt <- merge(dt.melt,w[,list(crop_category,indicator,weight_nonpeat,weight_peat)], # by = c('crop_category','indicator'), all.x = TRUE) # # # calculate correction factor for indicator values (low values have more impact than high values, a factor 5) # dt.melt[,cf := cf_ind_importance(value)] # # # calculate weighted value for crop category # dt.melt[,value.w := value] # dt.melt[grepl('veen',B_SOILTYPE_AGR) & weight_peat < 0,value.w := -999] # dt.melt[!grepl('veen',B_SOILTYPE_AGR) & weight_nonpeat < 0,value.w := -999] # ## ----eval = FALSE, include = FALSE-------------------------------------------- # # YF: I think this paragraph is not necessary # # After reformatting the data in step 3, an indicator data.table is created in step 4. This data.table uses the soil function scores adjusted for their applicability for the soiltype and crop category. In step 4 indicators are calculated to display them as output. In step 5 the total OBI score is calculated, since part of step 5 overlaps with step 4, we only discuss step 5 onwards. ## ----echo = TRUE, eval=FALSE-------------------------------------------------- # # Step 5 Add scores ------------------ # # # subset dt.melt for relevant columns only # out.score <- dt.melt[,list(cat, year, cf, value = value.w)] # # # remove indicator categories that are not used for scoring # out.score <- out.score[!cat %in% c('IBCS','IM','BCS')] ## ----echo = TRUE, eval=FALSE-------------------------------------------------- # # calculate weighted average per indicator category # out.score <- out.score[,list(value = sum(cf * pmax(0,value) / sum(cf[value >= 0]))), by = list(cat,year)] # # # for case that a cat has one indicator or one year and has NA # out.score[is.na(value), value := -999] ## ----output year cf----------------------------------------------------------- # create data y <- 1:11 cf <- log(12 - pmin(10, y)) cat(round(cf, 3)) ## ----echo = TRUE, eval=FALSE-------------------------------------------------- # # # calculate correction factor per year; recent years are more important # out.score[,cf := log(12 - pmin(10,year))] # # # calculate weighted average per indicator category per year # out.score <- out.score[,list(value = sum(cf * pmax(0,value)/ sum(cf[value >= 0]))), by = cat] ## ----echo = TRUE, eval=FALSE-------------------------------------------------- # # merge out with number per category # out.score <- merge(out.score,dt.melt.ncat, by='cat') ## ----eval = FALSE, include = FALSE-------------------------------------------- # # this text is probably wrong # # Furthermore, by aggregating indicators to categories and then to a score rather than directly from indicators to a score; individual indicators from categories with few underlying indicators, affect the holistic score more than indicators in categories with many indicators. For example, if there is one biological indicator, its weight in affecting the holistic score is log(1+1)= `r round(log(1+1),2)`, while a indicator within a chemical indicator with nine indicators individually only weighs log(9+1)/9= `r round(log(9+1)/9,2)`. While on category level, biology only weighs `r round(log(1+1),2)` and chemical `r round(log(9+1),2)`. ## ----echo = TRUE, eval=FALSE-------------------------------------------------- # # calculate weighing factor depending on number of indicators # out.score[,cf := log(ncat + 1)] # # calculated final obi score # out.score <- rbind(out.score[,list(cat,value)], # out.score[,list(cat = "T",value = sum(value * cf / sum(cf)))]) ## ----echo = TRUE, eval=FALSE-------------------------------------------------- # # update element names # out.score[,cat := paste0('S_',cat,'_OBI_A')] # out.score[, value := round(value,3)] ## ----make mock data, eval= TRUE----------------------------------------------- # make mock data and calculate scores with different aggregation methods (averaging without weight and averaging with linearly changing weight) # data like: # soil_function_value|indicator|group|year|cf_base|cf_noweight|cf_linearweight|score_base|score_noweight|score_linearweight # visualise differences # make veldnr fieldid <- 1 # define standard deviation std <- 0.2 # make indicator inds <- c('I_C_CEC', 'I_C_CU', 'I_C_K', 'I_C_MG', 'I_C_N', 'I_C_P', 'I_C_PH', 'I_C_S', 'I_C_ZN', 'I_B_DI', 'I_B_SF', 'I_E_NGW', 'I_E_NSW', 'I_M', 'I_P_CEC', 'I_P_CO', 'I_P_CR', 'I_P_DS', 'I_P_DU', 'I_P_SE', 'I_P_WRI', 'I_P_WS') # make jaar year <- 1:10 # combine in dt dt <- data.table(field = sort(rep(fieldid,length(inds)*length(year))), indicator = sort(rep(inds, length(year)*length(fieldid))), year = rep(year, length(inds)*length(fieldid)) ) # add category dt <- dt[,cat := tstrsplit(indicator,'_',keep = 2)] # iteratively add fields dto <- data.table(field = NULL, indicator = NULL, year = NULL) for(i in 1:100){ dtn <- dt dtn <- dtn[,field := i] dto <- rbindlist(list(dto, dtn)) } # dto is a almost ready set of 100 fields, only values and value description need to be added set.seed(222) # helper function to make random values within 0:1 rtnorm <- function(n, mean = 0, sd = 1, min = 0, max = 1) { bounds <- pnorm(c(min, max), mean, sd) u <- runif(n, bounds[1], bounds[2]) qnorm(u, mean, sd) } # make baseline dt1 <- copy(dto) dt1 <- dt1[,field := field+100-1] dt1 <- dt1[,treatment := 'baseline'] dt1 <- dt1[,value := rtnorm(n = nrow(dt1),mean = 0.7, sd = std)] # make treatment where c = 0.3 dt2 <- copy(dto) dt2 <- dt2[,field := field+200-1] dt2 <- dt2[,treatment := 'low C values'] dt2 <- dt2[cat == 'C',value := rtnorm(n = nrow(dt2[cat=='C']),mean = 0.3, sd = std)] dt2 <- dt2[!cat == 'C',value := rtnorm(n = nrow(dt2[!cat=='C']),mean = 0.7, sd = std)] # make treatment where B = 0.3 dt3 <- copy(dto) dt3 <- dt3[,field := field+300-1] dt3 <- dt3[,treatment := 'low B values'] dt3 <- dt3[cat == 'B',value := rtnorm(n = nrow(dt3[cat=='B']),mean = 0.3, sd = std)] dt3 <- dt3[!cat == 'B',value := rtnorm(n = nrow(dt3[!cat=='B']),mean = 0.7, sd = std)] # make treatment where one C indicator = 0 dt4 <- copy(dto) dt4 <- dt4[,field := field+400-1] dt4 <- dt4[,treatment := 'one low C'] dt4 <- dt4[indicator == 'I_C_CEC',value := 0] dt4 <- dt4[!indicator == 'I_C_CEC',value := rtnorm(n = nrow(dt4[!indicator == 'I_C_CEC']),mean = 0.7, sd = std)] # make where one B indicator = 0 dt5 <- copy(dto) dt5 <- dt5[,field := field+500-1] dt5 <- dt5[,treatment := 'one low B'] dt5 <- dt5[indicator == 'I_B_DI',value := 0] dt5 <- dt5[!indicator == 'I_B_DI',value := rtnorm(n = nrow(dt5[!indicator == 'I_B_DI']),mean = 0.7, sd = std)] # make treatment where recent years score low and old years high dt6 <- copy(dto) dt6 <- dt6[,field := field+600-1] dt6 <- dt6[,treatment := 'Recent years low'] dt6 <- dt6[year %in% 1:5, value := rtnorm(n = nrow(dt6[year %in% 1:5]), mean = 0.3, sd = std)] dt6 <- dt6[!year %in% 1:5, value := rtnorm(n = nrow(dt6[!year %in% 1:5]), mean = 0.7, sd = std)] # make treatment where recent years score high and old years low dt7 <- copy(dto) dt7 <- dt7[,field := field+700-1] dt7 <- dt7[,treatment := 'Recent years high'] dt7 <- dt7[!year %in% 1:5, value := rtnorm(n = nrow(dt7[!year %in% 1:5]), mean = 0.3, sd = std)] dt7 <- dt7[year %in% 1:5, value := rtnorm(n = nrow(dt7[year %in% 1:5]), mean = 0.7, sd = std)] # combine all data dta <- rbindlist(list(dt1, dt2, dt3, dt4, dt5, dt6, dt7)) # make sure all values are between 0 and 1 if(any(dta$value >1|dta$value<0)){cat('values outside acceptable bounds')} ## ----plot orignal values histogram,fig.width = 7, fig.height = 4,fig.fullwidth = TRUE, fig.cap = 'Figure 5. Distribution of indicator values per scenario as histogram'---- ggplot(dta, aes(x = value, fill = cat)) + geom_histogram(bins = 40) + theme_bw() +facet_wrap(~treatment, ncol = 4) ## ----table with mock data before aggregation---------------------------------- # get relevant data from dta dtat <- dta[, mean(value), by = c( 'cat', 'treatment')] # rounc V1 dtat <- dtat[, V1 := round(V1, digits = 3)] # improve category descrition dtat <- dtat[,cat := paste('mean', cat)] # dcast dtat <- dcast(dtat ,treatment~cat, value.var = 'V1') # factorise and order treatment dtat <- dtat[, treatment := factor(treatment, levels = c('baseline', 'low B values', 'low C values', 'one low B', 'one low C', 'Recent years high', 'Recent years low'))] # order dtat setorder(dtat) # make table knitr::kable(dtat, caption = 'Mean scores per category for each scenario') ## ----plot mock cf, fig.width = 7, fig.height = 3,fig.fullwidth = TRUE, fig.cap = 'Figure 2. Correction factors calculated with linear or logarithmic methods per aggregation step.'---- # plot correction factors pdtlog <- data.table(x = c(seq(0,1,0.1),rep(0:10,2)), cf_type = c(rep('value',11),rep('year',11), rep('ncat', 11))) # calc cf's log pdtlog <- pdtlog[cf_type == 'value', cf := OBIC::cf_ind_importance(x)] pdtlog <- pdtlog[cf_type == 'year', cf := log(12 - pmin(10,x))] pdtlog <- pdtlog[cf_type == 'ncat', cf := log(x + 1)] pdtlog[,cf_method := 'log'] # calc cf's linear pdtlin <- data.table(x = c(seq(0,1,0.1),rep(0:10,2)), cf_type = c(rep('value',11),rep('year',11), rep('ncat', 11))) pdtlin <- pdtlin[cf_type == 'value', cf := 5-4.17*x] pdtlin <- pdtlin[cf_type == 'year', cf := 2.59-0.19*x] pdtlin <- pdtlin[cf_type == 'ncat', cf := x*log(11)/10] pdtlin[,cf_method := 'linear'] # combine pdt <- rbindlist(list(pdtlog, pdtlin)) # format pdt pdt <- pdt[,cf_type := factor(cf_type, levels = c('value', 'ncat', 'year'))] # plot gg <- ggplot(pdt, aes(x = x, y = cf, color = cf_method, group = cf_type))+ geom_point() + theme_bw() + facet_wrap(~cf_type, ncol = 3, scales = 'free') + scale_colour_viridis_d()+ xlab('') + ylab('cf (weight)') # plot a line in each for(i in 1:uniqueN(pdt$cf_method)){ gg <- gg + geom_line(data = pdt[cf_method == unique(pdt$cf_method)[i]], color = c('#FDE725FF', '#440154FF')[i]) } # plot gg gg ## ----calc correction factors-------------------------------------------------- # Use three ways to calc correction factors (giving weight to each value), log (standard in OBIC), lin (linearly increasing/decreasing), non (everything has the same weight) # value correction factors ====== dt <- copy(dta) # function to add correction factors addcf <- function(dt){ # copy input dt.int <- copy(dt) # add correction factor for indicator value dt.int[,log := OBIC::cf_ind_importance(value)] dt.int[,lin := 5-4.17*value] dt.int[,non := 1] # melt dt by cf method dt.int <- melt(dt.int, measure.vars = c('log', 'lin', 'non'), value.name = 'v_cf', variable.name = 'cf_method') # calculate cf for cat ==== dt.int[,ncat := .N,by=c('field','year','cat','cf_method')] # add correction factor for number of categories dt.int[cf_method == 'log',c_cf := log(ncat + 1)] dt.int[cf_method == 'lin',c_cf := ncat*log(10 + 1)/10] dt.int[cf_method == 'non',c_cf := 1] # dd correction factor for number of years dt.int[cf_method == 'log',y_cf := log(12 - pmin(10,year))] dt.int[cf_method == 'lin',y_cf := 2.59-0.19*year] dt.int[cf_method == 'non',y_cf := 1] } # add correction factors dt <- addcf(dt) ## ----calc correction factors 2------------------------------------------------ # Use three ways to calc correction factors (giving weight to each value), log (standard in OBIC), lin (linearly increasing/decreasing), non (everything has the same weight) # for log and lin, 4 scenarios are added (c_cf only, v_cf only, y-cf only, or all) # value correction factors ====== dtp <- copy(dta) # function to add correction factors addcf2 <- function(dt){ # copy input dt.int <- copy(dt) # add correction factor for indicator value dt.int[,log_all := OBIC::cf_ind_importance(value)] dt.int[,log_vcf := OBIC::cf_ind_importance(value)] dt.int[,log_ccf := 1] dt.int[,log_ycf := 1] dt.int[,lin_all := 5-4.17*value] dt.int[,lin_vcf := 5-4.17*value] dt.int[,lin_ccf := 1] dt.int[,lin_ycf := 1] dt.int[,non := 1] # melt dt by cf method dt.int <- melt(dt.int, measure.vars = c('log_all', 'log_vcf', 'log_ccf', 'log_ycf', 'lin_all', 'lin_vcf', 'lin_ccf', 'lin_ycf', 'non'), value.name = 'v_cf', variable.name = 'cf_method') # calculate cf for cat ==== dt.int[,ncat := .N,by=c('field','year','cat','cf_method')] # add correction factor for number of categories dt.int[cf_method == 'log_all',c_cf := log(ncat + 1)] dt.int[cf_method == 'log_ccf',c_cf := log(ncat + 1)] dt.int[cf_method == 'log_vcf',c_cf := 1] dt.int[cf_method == 'log_ycf',c_cf := 1] dt.int[cf_method == 'lin_all',c_cf := ncat*log(10 + 1)/10] dt.int[cf_method == 'lin_ccf',c_cf := ncat*log(10 + 1)/10] dt.int[cf_method == 'lin_vcf',c_cf := 1] dt.int[cf_method == 'lin_ycf',c_cf := 1] dt.int[cf_method == 'non',c_cf := 1] # dd correction factor for number of years dt.int[cf_method == 'log_all',y_cf := log(12 - pmin(10,year))] dt.int[cf_method == 'log_ycf',y_cf := log(12 - pmin(10,year))] dt.int[cf_method == 'log_vcf',y_cf := 1] dt.int[cf_method == 'log_ccf',y_cf := 1] dt.int[cf_method == 'lin_all',y_cf := 2.59-0.19*year] dt.int[cf_method == 'lin_ycf',y_cf := 2.59-0.19*year] dt.int[cf_method == 'lin_vcf',y_cf := 1] dt.int[cf_method == 'lin_ccf',y_cf := 1] dt.int[cf_method == 'non',y_cf := 1] return(dt.int) } # add correction factors dt3 <- addcf2(dtp) ## ----aggregate scores--------------------------------------------------------- # make function to aggregate scores aggscores <- function(dt) { # copy input dt.agg <- copy(dt) # calculate weighted value per category and year dt.agg <- dt.agg[,list(w.value = sum(v_cf* pmax(0,value) / sum(v_cf[value >= 0])), y_cf = mean(y_cf), c_cf = mean(c_cf)), by = .(treatment,field, cf_method, cat, year)] # calculated weighted average value per category (so mean over years) dt.agg <- dt.agg[,list(wy.value = sum(y_cf * pmax(0, w.value) / sum(y_cf[w.value >= 0])), c_cf = mean(c_cf)), by = .(treatment,field, cf_method, cat)] # calculated weighted average value per field (so mean over categories) dt.agg.tot <- dt.agg[,list(value = sum(wy.value * c_cf / sum(c_cf))), by = .(treatment,field, cf_method)] # output dt.out <- rbind(dt.agg[,.(treatment,field,cf_method,cat,value = wy.value)], dt.agg.tot[,.(treatment,field,cf_method,cat='total',value)]) } # add scores to dt dt.out <- aggscores(dt) dt3.out <- aggscores(dt3) ## ----orig score funs of brent, eval=TRUE-------------------------------------- aggscores_brent <- function(dt) { # copy input dt.agg <- copy(dt) # calculate weighted value per category and year dt.agg <- dt.agg[,w.value := sum(v_cf* pmax(0,value) / sum(v_cf[value >= 0])), by = .(field, cf_method, cat, year)] # calculated weighted average value per category (so mean over years) dt.agg <- dt.agg[,wy.value := sum(y_cf * pmax(0, w.value) / sum(y_cf[w.value >= 0])), by = .(field, cf_method, cat)] # scores per category # calculate total obi score (log method) dt.agg <- dt.agg[,S_T := sum(wy.value * c_cf / sum(c_cf)), by = .(field, cf_method)] # calculate total obi score (lin method) dt.agg[, c_cf_lin := ncat*log(10 + 1)/10] dt.agg <- dt.agg[,S_T_catlin := sum(wy.value * c_cf_lin / sum(c_cf_lin)), by = .(field, cf_method)] # calculate total obi score (non method) dt.agg[, c_cf_non := 1] dt.agg <- dt.agg[,S_T_catnon := sum(wy.value * c_cf_non / sum(c_cf_non)), by = .(field, cf_method)] # calculate total obi score if not aggregated by cat dt.agg <- dt.agg[, nocat.value := sum(v_cf* pmax(0,value) / sum(v_cf[value >= 0])), by = .(field, cf_method, year)] dt.agg <- dt.agg[, S_T_nocat := sum(y_cf * pmax(0, nocat.value) / sum(y_cf[nocat.value >= 0])), by = .(field, cf_method)] # select data for scores dts <- unique(dt.agg[,.(field, indicator,cat, wy.value, S_T ,treatment, cf_method, S_T_nocat, S_T_catlin, S_T_catnon)]) # reshape dts so total scores are in same column as cat scores (with T being a cat) dts1 <- unique(dts[,.(field, cat, wy.value, treatment, cf_method)]) dts2 <- unique(dts[,.(field, S_T, treatment, cf_method)]) dts3 <- unique(dts[,.(field, S_T_nocat, treatment, cf_method)]) dts4 <- unique(dts[,.(field, S_T_catlin, treatment, cf_method)]) dts5 <- unique(dts[,.(field, S_T_catnon, treatment, cf_method)]) # rename cols setnames(dts1, 'wy.value', 'score') setnames(dts2, 'S_T', 'score') setnames(dts3, 'S_T_nocat', 'score') setnames(dts4, 'S_T_catlin', 'score') setnames(dts5, 'S_T_catnon', 'score') # add cat column to dts2 dts2$cat <- 'T_cf_log' dts3$cat <- 'T_nocat' dts4$cat <- 'T_cf_lin' dts5$cat <- 'T_cf_non' # bind scores dt's dt.agg <- rbindlist(list(dts1, dts2, dts3, dts4, dts5), use.names = TRUE) # update element names dt.agg[,cat := paste0('S_',cat)] dt.agg[, score := round(score,3)] # factorise cat and cf_method dt.agg <- dt.agg[, cat := factor(cat, levels = c('S_T_cf_log', 'S_C', 'S_P', 'S_B', 'S_E','S_M', "S_T_cf_lin", "S_T_cf_non", "S_T_nocat"))] dt.agg <- dt.agg[, cf_method := factor(cf_method, levels = c('log', 'lin', 'non'))] } dt2 <- aggscores_brent(dt) ## ----plot baselines scores, fig.width = 7, fig.height = 7,fig.fullwidth = TRUE, fig.cap = 'Figure 3. Total OBI score boxplots per aggregation method for each scenario.'---- # arithmetric mean of all indicators (this should be equal to lin_ccf?) arimean <- dt[cf_method == "non", arimean := mean(value), by = c("field", "treatment")] arimean <- dta[,arimean := mean(value), by = c("field", "treatment")] arimean2 <- unique(arimean[, .(field, treatment, arimean)]) arimean3 <- arimean2[, median(arimean), by = treatment] # plot ggplot(dt.out[cat == 'total'], aes(x = value, y = cf_method)) + geom_vline(data = arimean3, aes(xintercept = V1), col = "red", lty = 2) + geom_boxplot() + theme_bw() + scale_colour_viridis_d() + scale_y_discrete(limits = rev) + coord_cartesian(xlim = c(0,1)) + facet_wrap(~treatment, ncol = 1) ## ----OBI score summary table-------------------------------------------------- # get relevant data from dt dtt <- dt.out[, list(value = round(mean(value),3)), by = c( 'cat', 'cf_method', 'treatment')] # improve category discretion dtt <- dtt[,ct := paste('mean', cat)] # dcast dtt <- dcast(dtt ,treatment+cf_method~cat, value.var = 'value') # factorise and order treatment dtt <- dtt[, treatment := factor(treatment, levels = c('baseline', 'low B values', 'low C values', 'one low B', 'one low C', 'Recent years high', 'Recent years low'))] # order setorder(dtt, treatment, cf_method) # make table knitr::kable(dtt, caption = 'Mean scores per category and total per aggregation method') # make dtt version with just total scores to use for in text reporting dttt <- dtt[,.(treatment, cf_method, S_T_OBI_A = total)] ## ----plot original values, eval=FALSE, fig.width = 7, fig.height = 20,fig.fullwidth = TRUE, fig.cap = 'Figure 4. Distribution of indicator values per scenario.'---- # # factorise dta cat levels # # dta <- dta[, cat := factor(cat, levels = c('C', 'P', 'B', 'E'))] # # dta <- dta[, indicator := factor(indicator, levels = c("I_C_CEC","I_C_CU", "I_C_K", "I_C_MG", "I_C_N", "I_C_P", "I_C_PH", "I_C_S", "I_C_ZN", # # "I_P_CR", "I_P_DS", "I_P_DU", "I_P_SE", "I_P_WRI", "I_P_WS","I_P_CEC","I_P_CO", # # "I_B_DI", "I_B_SF", "I_E_NGW","I_E_NSW"))] # # # # # plot # # ggplot(dta, aes(x = value, y = indicator, color = cat))+ # # geom_boxplot() + # # theme_bw() + coord_cartesian(xlim = c(0,1)) + scale_y_discrete(limits = rev)+ # # facet_wrap(~treatment, ncol = 1) # ## ----plot baselines scores zoomed in, fig.width = 7, fig.height = 2,fig.fullwidth = TRUE, fig.cap = 'Figure 4. Total OBI score per aggregation method for baseline sceinario.'---- # plot (different colors per method, 'lin' and 'log' are divided into 4 variation) dt3.out[, method_main := substr(cf_method, 1, 3)] dt3.out[, alpha := 1] dt3.out[!grepl("_all|non", cf_method), alpha := 0.5] ggplot(dt3.out[cat == 'total' & treatment =="baseline"], aes(x = value, y = cf_method, fill = method_main, alpha = alpha)) + # geom_vline(data = arimean3, aes(xintercept = V1), col = "red", lty = 2) + geom_boxplot() + theme_bw() + scale_colour_viridis_d() + scale_y_discrete(limits = rev) + labs(fill = "")+ guides(alpha = F)+ xlab("total score") + ylab("scenario") + coord_cartesian(xlim = c(0,1)) # check standard deviation #dt3.out[cat == 'total' & treatment =="baseline", sd(value), by = cf_method] ## ----plot scores with and without cat, fig.width = 7, fig.height = 5,fig.fullwidth = TRUE, fig.cap = 'Figure 5. Scores when categories are ignored during aggregation (S_Tnocat_OBI_A) and regular aggregation (S_T_OBI_A).'---- #dt <- copy(dta) #dt <- addcf(dt) #dt2 <- aggscores_brent(dt) dt2[, colv := "no category aggregation"] dt2[grepl("S_T_cf", cat), colv := "with category aggregation"] gg3 <- ggplot(dt2[cat %in% c('S_T_cf_log', #'S_T_cf_lin', 'S_T_cf_non', 'S_T_nocat') & cf_method == 'log' & !treatment %in% c('Recent years low', 'Recent years high')], aes(x = score, y = cat)) + geom_boxplot(aes(fill = colv)) + labs(fill="") + xlab("total score") + ylab("")+ theme_bw() + scale_y_discrete(limits = rev) + coord_cartesian(xlim = c(0,1)) + facet_wrap(~treatment, ncol = 1) #+ geom_boxplot(data = dt[cat == 'S_Tnocat_OBI_A'], mapping = aes(fill = 'blue')) gg3 ## ----eval=FALSE--------------------------------------------------------------- # obic_field(B_SOILTYPE_AGR = dt$B_SOILTYPE_AGR, B_GWL_CLASS = dt$B_GWL_CLASS, # B_SC_WENR = dt$B_SC_WENR, B_HELP_WENR = dt$B_HELP_WENR, B_AER_CBS = dt$B_AER_CBS, # B_LU_BRP = dt$B_LU_BRP, A_SOM_LOI = dt$A_SOM_LOI, A_SAND_MI = dt$A_SAND_MI, # A_SILT_MI = dt$A_SILT_MI, A_CLAY_MI = dt$A_CLAY_MI, A_PH_CC = dt$A_PH_CC, # A_N_RT = dt$A_N_RT, A_CN_FR = dt$A_CN_FR, # A_S_RT = dt$A_S_RT, A_N_PMN = dt$A_N_PMN, # A_P_AL = dt$A_P_AL, A_P_CC = dt$A_P_CC, A_P_WA = dt$A_P_WA, A_CEC_CO = dt$A_CEC_CO, # A_CA_CO_PO = dt$A_CA_CO_PO, A_MG_CO_PO = dt$A_MG_CO_PO, A_K_CO_PO = dt$A_K_CO_PO, # A_K_CC = dt$A_K_CC, A_MG_CC = dt$A_MG_CC, A_MN_CC = dt$A_MN_CC, # A_ZN_CC = dt$A_ZN_CC, A_CU_CC = dt$A_CU_CC, output = 'obic_score') ## ----get binnenveld indicator values, eval=TRUE------------------------------- # cleanup bini if required if(exists('bini')){rm(bini)} # select columns bcols <- names(binnenveld)[!grepl('BCS$', names(binnenveld))] # get indicator values per field, for first 10 fields for(i in unique(binnenveld$ID)[1:10]){ # calc OBIC inidicators for i bini.n <- obic_field_dt(binnenveld[ID == i,..bcols], output = 'unaggregated') # re add ID bini.n <- bini.n[,ID := i] if(!exists('bini')){ # if bini doesn't exist yet make it bini <- bini.n } else{ # if bini exists, add bini.n to bini (binnenveld indicators) bini <- rbindlist(list(bini, bini.n)) } } # remove inidicators not used for scoring bini <- bini[!cat %in% c('BCS', 'IBCS', 'IM')] # remove irrelevant columns rmcols <- names(bini)[!grepl('^weight|cf|w$', names(bini))] bini <- bini[,..rmcols] # rename ID to field setnames(bini, 'ID', 'field') # add treatment bini$treatment <- bini$field ## ----aggregate binneveld, eval=TRUE------------------------------------------- # add correction factors bini <- addcf(bini) # add scores bini <- aggscores(bini) ## ----plot binnenveld scores, eval=TRUE, message = FALSE, fig.height= 9, fig.width= 8 ,fig.cap="Figure 6. Total and category OBI scores of binnenveld fields aggregated with 'log', 'lin' or 'non' method, as well as total scores when disregarding categories in aggregating scores."---- # make labels ldt <- binnenveld[,.(ID, B_LU_BRP, B_SOILTYPE_AGR, B_GWL_CLASS)] ldt <- ldt[ID %in% unique(ID)[1:10]] # get most occurring soil type and crop type ldt <- ldt[, lapply(.SD, function (x) names(sort(table(x),decreasing = TRUE)[1])), .SDcols = c('B_LU_BRP','B_SOILTYPE_AGR', 'B_GWL_CLASS'),by = ID] ldt[, B_LU_BRP := as.integer(B_LU_BRP)] # add crop name ldt <- merge(ldt, crops.obic[,.(crop_code, crop_name)], by.x = 'B_LU_BRP', by.y = 'crop_code') # order ldt setorder(ldt, ID) # make cat labels more readable bini[grepl('^T', cat), lcat := "Total"] bini[grepl('^C', cat), lcat := "Chemical"] bini[grepl('^B', cat), lcat := "Biological"] bini[grepl('^P', cat), lcat := "Physical"] bini[grepl('^M', cat), lcat := "Management"] bini[grepl('^E', cat), lcat := "Environmental"] bini[, lcat := factor(lcat, levels = c('Chemical', 'Physical', 'Biological', 'Environmental', 'Management','Total'))] # make plot gg <- ggplot(bini, aes(x= lcat, y= value, color = cf_method)) + geom_point(size = 2,alpha = 0.5) + ylab('OBI-score') + xlab('') + theme_bw(12) + theme(panel.grid.major.x = element_blank(), panel.grid.major.y = element_line(size = 0.5), panel.grid.minor.x = element_blank()) + coord_cartesian(xlim = c(0,1))+ scale_x_discrete(limits = rev)+ scale_y_continuous(breaks = c(0, 0.25, 0.5, 0.75, 1))+ coord_flip() + facet_wrap(~field, ncol = 3) + theme(legend.position = c(0.8,0.1)) # show gg gg # show table with data on binnenveld fields knitr::kable(ldt[,.(ID, B_SOILTYPE_AGR, B_GWL_CLASS, crop_name)], caption = 'Soiltype, groundwaterclass and most frequent crop per binnenveld field.') ## ----graphical representation of EPX , fig.cap= "NAC scores where one variable varies and the others are set to Maximum ecological potential (MEP), variable values range from 5th percentile to 95th percentile reported by Rutgers (2008). Type indicates whether standard calculation (i) is used or not(j) where j improves on the NAC."---- # make table with reference data from Rutgers2008 ac.mep <- data.table(var = c('FMA', 'PotC', 'PotN', 'SOM', 'pH', 'PAL'), mep.v = c(2700, 18, 2.0, 2.2, 7.6, 47), soiltype = rep('klei', 6), landuse = rep('akkerbouw', 6), nl.mean = c(1150,22, 2, 2.5, 7.5, 47), nl5perc = c(14, 9, 0.5, 1.6, 7.3, 31), nl95perc = c(3960, 48, 3.7, 3.6, 7.7, 62)) # make data dtS <- data.table(x = seq(1.6,3.6,length.out = 100), var = 'SOM') dtF <- data.table(x=seq(10,3990,length.out = 100), var = 'FMA') dtF <- dtF[x>200] dtH <- data.table(x=seq(7.3,7.7,length.out = 100), var = 'pH') dtP <- data.table(x=seq(31,62,length.out = 100), var = 'PAL') dtC <- data.table(x=seq(9,48,length.out = 100), var = 'PotC') dtN <- data.table(x=seq(0.5,3.7,length.out = 100), var = 'PotN') # combine data dta <- rbindlist(list(dtS, dtF, dtH, dtP, dtC, dtN)) # add mep values dta <- merge(dta, ac.mep[,.(var, mep.v)], by = 'var') # indicate when to use i or j type dta[,type := 'i'] dta[(var == 'FMA' & xmep.v, type := 'j'] # calc y dta[,y := fifelse(type == 'i',10^-(abs(log10(x/mep.v)/6)), 10^(abs(log10(x/mep.v)/6)))] # plot ggplot(dta) + geom_line(aes(x = x, y = y, col = type))+ ylab("NAC score if all other variables are at MEP") + xlab("Variable value") + # labs(title = 'j (red) and i (black) type plot, j-type is applied') + facet_wrap(facets = ~var, scales = 'free_x', ncol = 3) + theme_bw() ## ----eval = FALSE, include=FALSE---------------------------------------------- # # e.g. (van Wijnen et. al. 2012 and Rutgers et. al. 2012) or Moebius-Clune 2016