gboris / blkbox

Data exploration with multiple machine learning algorithms
14 stars 4 forks source link

cool plot with CV results - might add later #49

Open zacdav opened 8 years ago

zacdav commented 8 years ago


cv_performance = Performance(cv_results)
# Outputing Performance
unlist(cv_performance$Performance)
# AUROC Curve
blkboxROC(cv_performance)
# Barplot of AUROC (or any other measure if u wish to change it)
cv_plot = cv.plot(cv_results, metric = "AUROC", type = "barplot")

# Doing a bit of manipulation to find features importance
cv_avg_imp = lapply(cv_results$algorithm.importance, function(x){
 data.frame(t(data.frame(x[,11])))
}) %>%
  Reduce(rbind.fill, .) %>%
  cbind(algorithm = names(cv_results$algorithm.importance), .) %>%
  t() %>%
  data.frame() 

# storing row names since dplyr removes them (annoyingly)
rnames = rownames(cv_avg_imp)

cv_avg_imp = cv_avg_imp %>% 
  mutate_each(funs(as.character))  

# The importance metrics are not uniform across algorihtms, so lets sort that out
# Relative scaling to 1.0, division by highest (max)

colnames(cv_avg_imp) = cv_avg_imp[1, ]
cv_avg_imp = cv_avg_imp[-1, ] %>% 
  mutate_each(funs(as.numeric)) %>%
  mutate_each(funs(./max(.))) %>%
  cbind(feature = rnames[-1], .)

# Wide to long (Gather)
cv_avg_imp = cv_avg_imp %>% 
  gather(algorithm, importance, 2:ncol(cv_avg_imp))

ggplot(cv_avg_imp, aes(x = algorithm, y = importance)) + 
  geom_bar(aes(fill = algorithm), stat="identity") + 
  facet_wrap(~feature) +
  theme_bw() + theme(axis.text.x = element_text(angle = 90))