## ----setup, include=FALSE----------------------------------------------------- knitr::opts_chunk$set(echo = TRUE,message=FALSE ,warning=FALSE, tidy.opts=list(width.cutoff=60),tidy=TRUE) ## ----------------------------------------------------------------------------- library(sgs) groups = c(rep(1:20, each=3), rep(21:40, each=4), rep(41:60, each=5), rep(61:80, each=6), rep(81:100, each=7)) data = gen_toy_data(p=500, n=400, groups = groups, seed_id=3) ## ----------------------------------------------------------------------------- model = fit_sgs(X = data$X, y = data$y, groups = groups, type="linear", lambda = 0.5, alpha=0.95, vFDR=0.1, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=TRUE) ## ----------------------------------------------------------------------------- model_path = fit_sgs(X = data$X, y = data$y, groups = groups, type="linear", lambda = "path", path_length = 5, min_frac = 0.1, alpha=0.95, vFDR=0.1, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=TRUE) ## ----------------------------------------------------------------------------- model$beta[model$selected_var+1] # the +1 is to account for the intercept model$group_effects[model$selected_grp] model$selected_var model$selected_grp ## ----------------------------------------------------------------------------- fdr_sensitivity = function(fitted_ids, true_ids,num_coef){ # calculates FDR, FPR, and sensitivity num_true = length(intersect(fitted_ids,true_ids)) num_false = length(fitted_ids) - num_true num_missed = length(true_ids) - num_true num_true_negatives = num_coef - length(true_ids) out=c() out$fdr = num_false / (num_true + num_false) if (is.nan(out$fdr)){out$fdr = 0} out$sensitivity = num_true / length(true_ids) if (length(true_ids) == 0){ out$sensitivity = 1 } out$fpr = num_false / num_true_negatives out$f1 = (2*num_true)/(2*num_true + num_false + num_missed) if (is.nan(out$f1)){out$f1 = 1} return(out) } ## ----------------------------------------------------------------------------- fdr_sensitivity(fitted_ids = model$selected_var, true_ids = data$true_var_id, num_coef = 500) fdr_sensitivity(fitted_ids = model$selected_grp, true_ids = data$true_grp_id, num_coef = 100) ## ----------------------------------------------------------------------------- cv_model = fit_sgs_cv(X = data$X, y = data$y, groups=groups, type = "linear", path_length = 20, nfolds=10, alpha = 0.95, vFDR = 0.1, gFDR = 0.1, min_frac = 0.05, standardise="l2",intercept=TRUE,verbose=TRUE, screen = TRUE) ## ----------------------------------------------------------------------------- print(cv_model) ## ----------------------------------------------------------------------------- cv_model$best_lambda_id ## ----------------------------------------------------------------------------- fdr_sensitivity(fitted_ids = cv_model$fit$selected_var, true_ids = data$true_var_id, num_coef = 500) fdr_sensitivity(fitted_ids = cv_model$fit$selected_grp, true_ids = data$true_grp_id, num_coef = 100) ## ----------------------------------------------------------------------------- plot(cv_model,how_many = 10) ## ----------------------------------------------------------------------------- predict(model,data$X)[1:5] dim(predict(cv_model,data$X)) ## ----------------------------------------------------------------------------- sigmoid = function(x) { 1 / (1 + exp(-x)) } y = ifelse(sigmoid(data$X %*% data$true_beta + rnorm(400))>0.5,1,0) train_y = y[1:350] test_y = y[351:400] train_X = data$X[1:350,] test_X = data$X[351:400,] ## ----------------------------------------------------------------------------- cv_model = fit_sgs_cv(X = train_X, y = train_y, groups=groups, type = "logistic", path_length = 20, nfolds=10, alpha = 0.95, vFDR = 0.1, gFDR = 0.1, min_frac = 0.05, standardise="l2",intercept=FALSE,verbose=TRUE, screen = TRUE) ## ----------------------------------------------------------------------------- predictions = predict(cv_model,test_X) ## ----------------------------------------------------------------------------- predictions$response[1:5,cv_model$best_lambda_id] predictions$class[1:5,cv_model$best_lambda_id] sum(predictions$class[,cv_model$best_lambda_id] == test_y)/length(test_y) ## ----------------------------------------------------------------------------- groups = c(rep(1:20, each=3), rep(21:40, each=4), rep(41:60, each=5), rep(61:80, each=6), rep(81:100, each=7)) data = gen_toy_data(p=500, n=400, groups = groups, seed_id=3) ## ----------------------------------------------------------------------------- model = fit_gslope(X = data$X, y = data$y, groups = groups, type="linear", lambda = 0.5, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=TRUE) ## ----------------------------------------------------------------------------- screen_time = system.time(model_screen <- fit_sgs(X = data$X, y = data$y, groups = groups, type="linear", path_length = 100, alpha=0.95, vFDR=0.1, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=TRUE)) no_screen_time = system.time(model_no_screen <- fit_sgs(X = data$X, y = data$y, groups = groups, type="linear", path_length = 100, alpha=0.95, vFDR=0.1, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=FALSE)) screen_time no_screen_time ## ----------------------------------------------------------------------------- screen_time = system.time(model_screen <- fit_gslope(X = data$X, y = data$y, groups = groups, type="linear", path_length = 100, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=TRUE)) no_screen_time = system.time(model_no_screen <- fit_gslope(X = data$X, y = data$y, groups = groups, type="linear", path_length = 100, gFDR=0.1, standardise = "l2", intercept = TRUE, verbose=FALSE, screen=FALSE)) screen_time no_screen_time