Bioconductor / AnVIL

Interact with AnVIL and Leonardo projects
https://bioconductor.org/packages/AnVIL
8 stars 10 forks source link

Can't get the output files using `avworkflow_files` #69

Closed shbrief closed 2 years ago

shbrief commented 2 years ago

Hi! It seems like I can't get the output files using avworkflow_files. Below, I tried to extract the output file locations from both aborted and successfully-done workflows using Terra API directly and avworkflow_files function.

Collect input parameters:

> library(AnVIL)
> 
> ## Get the namespaces
> ws_fullname <- .get_workspace_fullname(workspaceName = "salmon_test")
> ws_namespace <- unlist(strsplit(ws_fullname, "/"))[1] # "waldronlab-terra-rstudio"
> ws_name <- unlist(strsplit(ws_fullname, "/"))[2] # "salmon_test"
> avworkspace(ws_fullname)
[1] "waldronlab-terra-rstudio/salmon_test"
> 
> ## List of all the submissions
> submissions <- monitorWorkflow(workspaceName = ws_fullname)
> submissions
# A tibble: 4 × 6
  submissionId                         submitter         submissionDate      status  succeeded failed
  <chr>                                <chr>             <dttm>              <chr>       <int>  <int>
1 78584fc8-8647-4ef0-bd79-426de940c52b shbrief@gmail.com 2022-10-12 16:09:08 Aborted         0      0
2 73f16bcd-eaf9-4c2d-99ce-c455a1044025 shbrief@gmail.com 2022-10-07 14:56:24 Done            1      0
3 452e1a14-0fa6-48dd-a04b-19770e0e8694 shbrief@gmail.com 2022-10-06 10:49:56 Aborted         0      0
4 3df266ca-8307-4545-9b5b-c496408b0367 shbrief@gmail.com 2022-10-05 10:42:21 Aborted         0      0

Get outputs:

> ## Get output using Terra API
> terra <- Terra()
> res_aborted <- terra$workflowOutputsInSubmission(
+     workspaceNamespace = ws_namespace,
+     workspaceName = ws_name,
+     submissionId = submissions$submissionId[1], # aborted
+     workflowId = "c2ce4342-c1bc-4604-9b4b-801426d1394e"
+ )
> dat_aborted <- fromJSON(rawToChar(res_aborted$content))
> 
> res_done <- terra$workflowOutputsInSubmission(
+     workspaceNamespace = ws_namespace,
+     workspaceName = ws_name,
+     submissionId = submissions$submissionId[2], # successfully done
+     workflowId = "7a6d7f6c-a79d-4690-95cc-e5b0a866266e"
+ )
> dat_done <- fromJSON(rawToChar(res_done$content))
> 
> names(dat_aborted$tasks)
[1] "salmon.salmon_index"
> names(dat_done$tasks)
[1] "salmon.salmon_index" "salmon.salmon_quant"
> dat_done$tasks$salmon.salmon_quant$outputs
$salmon.salmon_quant.quant_output
 [1] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-0/DRR016125_1.tar.gz" 
 [2] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-1/DRR016126_1.tar.gz" 
 [3] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-2/DRR016127_1.tar.gz" 
 [4] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-3/DRR016128_1.tar.gz" 
 [5] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-4/DRR016129_1.tar.gz" 
 [6] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-5/DRR016130_1.tar.gz" 
 [7] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-6/DRR016131_1.tar.gz" 
 [8] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-7/DRR016132_1.tar.gz" 
 [9] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-8/DRR016133_1.tar.gz" 
[10] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-9/DRR016134_1.tar.gz" 
[11] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-10/DRR016135_1.tar.gz"
[12] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-11/DRR016136_1.tar.gz"
[13] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-12/DRR016137_1.tar.gz"
[14] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-13/DRR016138_1.tar.gz"
[15] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-14/DRR016139_1.tar.gz"
[16] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-15/DRR016140_1.tar.gz"

> 
> ## Get output using `avworkflow_files`
> av_res_aborted <- avworkflow_files(submissionId = submissions$submissionId[1],
+                                    bucket = avbucket(namespace = ws_namespace,
+                                                      name = ws_name))
> av_res_done <- avworkflow_files(submissionId = submissions$submissionId[2],
+                                 bucket = avbucket(namespace = ws_namespace,
+                                                   name = ws_name))
> 
> av_res_aborted
# A tibble: 0 × 5
# … with 5 variables: file <chr>, workflow <chr>, task <chr>, type <chr>, path <chr>
# ℹ Use `colnames()` to see all variable names
> av_res_done
# A tibble: 0 × 5
# … with 5 variables: file <chr>, workflow <chr>, task <chr>, type <chr>, path <chr>
# ℹ Use `colnames()` to see all variable names
mtmorgan commented 2 years ago

I think previously files in the bucket started with <submissionId>/... but now start with submissions/<submissionId>. I'll look into update the code or using a different endpoint...

mtmorgan commented 2 years ago

BiocManager::install("Bioconductor/AnVIL", ref = "issue-69") should behave better... let me know if there are issues when you have a chance.

shbrief commented 2 years ago

It's giving an error from my end.

> ## Input arguments
> submissionId <- "73f16bcd-eaf9-4c2d-99ce-c455a1044025"
> ws_namespace <- "waldronlab-terra-rstudio"
> ws_name <- "salmon_test"
> 
> ## avworkflow_files
> av_res_done <- avworkflow_files(submissionId = submissionId,
+                                 bucket = avbucket(namespace = ws_namespace, 
+                                                   name = ws_name))
Error in avworkflow_files(submissionId = submissionId, bucket = avbucket(namespace = ws_namespace,  : 
  .is_scalar_character(name) is not TRUE
In addition: Warning message:
'bucket' is deprecated; it is ignored in the current implementation and will be removed in a subsequent release 
> 
> 
> ## Terra API directly
> terra <- Terra()
> res_done <- terra$workflowOutputsInSubmission(
+     workspaceNamespace = ws_namespace,
+     workspaceName = ws_name,
+     submissionId = "73f16bcd-eaf9-4c2d-99ce-c455a1044025",
+     workflowId = "7a6d7f6c-a79d-4690-95cc-e5b0a866266e")
> dat_done <- jsonlite::fromJSON(rawToChar(res_done$content))
> dat_done$tasks$salmon.salmon_quant$outputs
$salmon.salmon_quant.quant_output
 [1] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-0/DRR016125_1.tar.gz" 
 [2] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-1/DRR016126_1.tar.gz" 
 [3] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-2/DRR016127_1.tar.gz" 
 [4] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-3/DRR016128_1.tar.gz" 
 [5] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-4/DRR016129_1.tar.gz" 
 [6] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-5/DRR016130_1.tar.gz" 
 [7] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-6/DRR016131_1.tar.gz" 
 [8] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-7/DRR016132_1.tar.gz" 
 [9] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-8/DRR016133_1.tar.gz" 
[10] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-9/DRR016134_1.tar.gz" 
[11] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-10/DRR016135_1.tar.gz"
[12] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-11/DRR016136_1.tar.gz"
[13] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-12/DRR016137_1.tar.gz"
[14] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-13/DRR016138_1.tar.gz"
[15] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-14/DRR016139_1.tar.gz"
[16] "gs://fc-a771349a-c846-4736-ab87-9e109b29bfce/submissions/73f16bcd-eaf9-4c2d-99ce-c455a1044025/salmon/7a6d7f6c-a79d-4690-95cc-e5b0a866266e/call-salmon_quant/shard-15/DRR016140_1.tar.gz"

> 
> ## Session Info
> sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: aarch64-apple-darwin20 (64-bit)
Running under: macOS Monterey 12.2.1

Matrix products: default
LAPACK: /Library/Frameworks/R.framework/Versions/4.1-arm64/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] AnVIL_1.9.13.2        AnVILWorkflow_0.99.18 dplyr_1.0.10         

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.9           pillar_1.8.1         compiler_4.1.2       formatR_1.12         later_1.3.0         
 [6] futile.logger_1.4.3  futile.options_1.0.1 tools_4.1.2          digest_0.6.30        evaluate_0.17       
[11] jsonlite_1.8.3       lifecycle_1.0.3      tibble_3.1.8         pkgconfig_2.0.3      rlang_1.0.6         
[16] shiny_1.7.3          cli_3.4.1            DBI_1.1.3            rstudioapi_0.14      curl_4.3.3          
[21] yaml_2.3.6           parallel_4.1.2       xfun_0.34            fastmap_1.1.0        knitr_1.40          
[26] httr_1.4.4           generics_0.1.3       vctrs_0.5.0          htmlwidgets_1.5.4    DT_0.26             
[31] tidyselect_1.2.0     glue_1.6.2           R6_2.5.1             fansi_1.0.3          rmarkdown_2.17      
[36] purrr_0.3.5          tidyr_1.2.1          lambda.r_1.2.4       magrittr_2.0.3       ellipsis_0.3.2      
[41] promises_1.2.0.1     htmltools_0.5.3      rapiclient_0.1.3     assertthat_0.2.1     mime_0.12           
[46] xtable_1.8-4         httpuv_1.6.6         utf8_1.2.2           miniUI_0.1.1.1      
mtmorgan commented 2 years ago

I changed the arguments to the function as suggested by

In addition: Warning message:
'bucket' is deprecated; it is ignored in the current implementation and will be removed in a subsequent release 

I updated this warning to

In addition: Warning messages:
1: 'bucket=' is deprecated; it is ignored in the current implementation
and will be removed in a subsequent release; provide workspace
'namespace=' and 'name=' arguments to 'avworkflow_files()' directly 

so invoking as

av_res_done <- avworkflow_files(
    submissionId = submissionId, namespace = ws_namespace,  name = ws_name
)

is the intended use.

FWIW setting

avworkspace(paste(ws_namespace, ws_name, sep="/"))
## or avworkspace("waldronlab-terra-rstudio/salmon_test")
## or avworkspace_namespace(ws_namespace); avworkspace_name(ws_name)

sets those fields 'globally' so you can then just invoke

av_res_done <- avworkflow_files(submissionId = submissionId)
shbrief commented 2 years ago

Oh... sorry about that. Should have read the message more carefully. ;( It works well now! Thanks!!

mtmorgan commented 2 years ago

Great! I will wait until the end of next week to merge these to the new release and devel branches.