mlr-org / mlr3benchmark

Analysis and tools for benchmarking in mlr3 and beyond.
https://mlr3benchmark.mlr-org.com/
GNU Lesser General Public License v3.0
12 stars 2 forks source link

Learner ranks are not correctly calculated? #30

Closed bblodfon closed 1 year ago

bblodfon commented 1 year ago

Description

Hi,

See examples below for the issue. Datafile is available here. Either there is something wrong with the calculation of the learners' ranks or I just have the wrong idea of what is the 'best' learner, because coxph shouldn't be the best overall:

Reproducible example

library(mlr3verse)
library(mlr3benchmark)
library(dplyr)

aggr_res = readRDS(file = 'issues/benchmark_aggr.rds')
aggr_res
#> # A tibble: 124 × 4
#>    task_id  learner_id  HarrellC_median UnoC_median
#>    <fct>    <fct>                 <dbl>       <dbl>
#>  1 Clinical coxnet                0.546       0.439
#>  2 Clinical rsf_logrank           0.527       0.449
#>  3 Clinical xgboost_cox           0.568       0.477
#>  4 mRNA     coxnet                0.5         0.582
#>  5 mRNA     rsf_logrank           0.527       0.586
#>  6 mRNA     xgboost_cox           0.488       0.549
#>  7 miRNA    coxnet                0.468       0.559
#>  8 miRNA    rsf_logrank           0.486       0.589
#>  9 miRNA    xgboost_cox           0.501       0.593
#> 10 CNA      coxnet                0.462       0.539
#> # … with 114 more rows

ba = BenchmarkAggr$new(aggr_res)

autoplot(ba, type = 'mean', meas = 'HarrellC_median')

# for C-index higher is better, so `minimize=FALSE`
autoplot(ba, type = 'cd', meas = 'HarrellC_median', minimize = FALSE, style = 2)

# ranks are wrongly calculated?
ranks = ba$rank_data(meas = 'HarrellC_median', minimize = FALSE)

# 1st example
ranks[,"Clinical-miRNA"]
#>      coxnet       coxph rsf_logrank xgboost_cox 
#>           2           3           4           1
aggr_res %>%
  filter(task_id == 'Clinical-miRNA') %>%
  arrange(desc(HarrellC_median)) # completely different
#> # A tibble: 4 × 4
#>   task_id        learner_id  HarrellC_median UnoC_median
#>   <fct>          <fct>                 <dbl>       <dbl>
#> 1 Clinical-miRNA coxph                 0.556       0.624
#> 2 Clinical-miRNA coxnet                0.547       0.625
#> 3 Clinical-miRNA rsf_logrank           0.542       0.607
#> 4 Clinical-miRNA xgboost_cox           0.513       0.637

# 2nd example
ranks[,"Clinical"]
#>      coxnet       coxph rsf_logrank xgboost_cox 
#>           3           4           1           2
aggr_res %>%
  filter(task_id == 'Clinical') %>%
  arrange(desc(HarrellC_median)) # completely different
#> # A tibble: 4 × 4
#>   task_id  learner_id  HarrellC_median UnoC_median
#>   <fct>    <fct>                 <dbl>       <dbl>
#> 1 Clinical xgboost_cox           0.568       0.477
#> 2 Clinical coxph                 0.561       0.459
#> 3 Clinical coxnet                0.546       0.439
#> 4 Clinical rsf_logrank           0.527       0.449

Created on 2022-12-12 with reprex v2.0.2

bblodfon commented 1 year ago

I've verified that if I calculate the rank matrix myself I get correct results/plots, so there has to be something wrong with the implementation?

RaphaelS1 commented 1 year ago

Sorry your rds isn't loading for me so I can't reproduce this

bblodfon commented 1 year ago

I shall send you a file then!

bblodfon commented 1 year ago

Here it is => https://github.com/bblodfon/mlr3tests/blob/main/issues/benchmark_aggr.tsv

RaphaelS1 commented 1 year ago

Thanks fixed in https://github.com/mlr-org/mlr3benchmark/pull/31

Rank code was correct but wrong learner names were being added

RaphaelS1 commented 1 year ago
library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union
library(mlr3benchmark)
library(mlr3viz)
  aggr_res = read.table(file = "~/Desktop/test.tsv", sep = "\t", header = TRUE, colClasses = c("factor", "factor", "numeric", "numeric"))

  ba = BenchmarkAggr$new(aggr_res)
  ba$rank_data(meas = "HarrellC_median")
#>             Clinical Clinical-CNA Clinical-CNA-Methyl Clinical-Methyl
#> coxnet             2            2                   2               3
#> rsf_logrank        1            4                   3               2
#> xgboost_cox        4            1                   4               4
#> coxph              3            3                   1               1
#>             Clinical-miRNA Clinical-miRNA-CNA Clinical-miRNA-CNA-Methyl
#> coxnet                   3                  2                         2
#> rsf_logrank              2                  4                         3
#> xgboost_cox              1                  3                         4
#> coxph                    4                  1                         1
#>             Clinical-miRNA-Methyl Clinical-mRNA Clinical-mRNA-CNA
#> coxnet                          2             4                 3
#> rsf_logrank                     3             2                 4
#> xgboost_cox                     4             1                 2
#> coxph                           1             3                 1
#>             Clinical-mRNA-CNA-Methyl Clinical-mRNA-Methyl Clinical-mRNA-miRNA
#> coxnet                             2                    2                   3
#> rsf_logrank                        4                    4                   2
#> xgboost_cox                        3                    3                   1
#> coxph                              1                    1                   4
#>             Clinical-mRNA-miRNA-CNA Clinical-mRNA-miRNA-CNA-Methyl
#> coxnet                            2                              1
#> rsf_logrank                       4                              4
#> xgboost_cox                       3                              3
#> coxph                             1                              2
#>             Clinical-mRNA-miRNA-Methyl CNA CNA-Methyl Methyl miRNA miRNA-CNA
#> coxnet                               2   1          2      2     2         1
#> rsf_logrank                          4   3          4      3     3         3
#> xgboost_cox                          3   4          3      4     4         4
#> coxph                                1   2          1      1     1         2
#>             miRNA-CNA-Methyl miRNA-Methyl mRNA mRNA-CNA mRNA-CNA-Methyl
#> coxnet                     1            2    2        1               1
#> rsf_logrank                4            3    4        4               3
#> xgboost_cox                3            4    1        2               4
#> coxph                      2            1    3        3               2
#>             mRNA-Methyl mRNA-miRNA mRNA-miRNA-CNA mRNA-miRNA-CNA-Methyl
#> coxnet                2          1              1                     2
#> rsf_logrank           3          4              4                     4
#> xgboost_cox           4          2              3                     3
#> coxph                 1          3              2                     1
#>             mRNA-miRNA-Methyl
#> coxnet                      2
#> rsf_logrank                 3
#> xgboost_cox                 4
#> coxph                       1

  ba$rank_data(meas = "HarrellC_median", task = "Clinical")
#>             Clinical
#> coxnet             2
#> rsf_logrank        1
#> xgboost_cox        4
#> coxph              3

  ba$data %>%
    filter(task_id == "Clinical") %>%
    arrange(HarrellC_median)
#>     task_id  learner_id HarrellC_median UnoC_median
#> 1: Clinical rsf_logrank       0.5268655   0.4493737
#> 2: Clinical      coxnet       0.5460005   0.4391885
#> 3: Clinical       coxph       0.5614611   0.4586783
#> 4: Clinical xgboost_cox       0.5681803   0.4771556

  autoplot(ba, type = 'mean', meas = 'HarrellC_median')

  autoplot(ba, type = 'cd', meas = 'HarrellC_median', minimize = FALSE, style = 2)

Created on 2022-12-16 with reprex v2.0.2

bblodfon commented 1 year ago

Yep, that's it!