darwin-eu / CDMConnector

A pipe friendly way to interact with an OMOP Common Data Model
https://darwin-eu.github.io/CDMConnector/
Apache License 2.0
12 stars 9 forks source link

Redshift connection not supported #7

Closed lluo0 closed 11 months ago

lluo0 commented 11 months ago

Hi! I was trying to create a cdm connection using this package for IncidencePrevalence. However, I got an error saying that it doesn't support Redshift connection. Error:

Error in `dbms()`:
! Redshift is not a supported connection type.
Run `rlang::last_error()` to see where the error occurred.
> rlang::last_error()
<error/rlang_error>
Error in `dbms()`:
! Redshift is not a supported connection type.
---
Backtrace:
 1. CDMConnector::cdm_from_con(...)
 4. CDMConnector:::dbms.DBIConnection(con)
Run `rlang::last_trace()` to see the full context.
> rlang::last_trace()
<error/rlang_error>
Error in `dbms()`:
! Redshift is not a supported connection type.
---
Backtrace:
    ▆
 1. └─CDMConnector::cdm_from_con(...)
 2.   ├─dbms(con) %in% c("duckdb", "sqlite")
 3.   ├─CDMConnector::dbms(con)
 4.   └─CDMConnector:::dbms.DBIConnection(con)
 5.     └─rlang::abort(glue::glue("{class(con)} is not a supported connection type."))

I was wondering if you could include redshift connection as well? Thanks so much!

edward-burn commented 11 months ago

Hi @lluo0 redshift is supported, please see here for examples of how to connect https://darwin-eu.github.io/CDMConnector/articles/a04_DBI_connection_examples.html (note for redshift you should use RPostgres::Redshift())

As an aside, if you´re about to use IncidencePrevalence please use the latest version from github https://github.com/darwin-eu/IncidencePrevalence, as today we actually updated it so it will work for redshift with the latest versions of dplyr and dbplyr (https://github.com/darwin-eu/IncidencePrevalence/issues/2)

ablack3 commented 11 months ago

Hi @lluo0,

Try installing the development version of CDMConnector by running remotes::install_github("OdyOSG/CDMConnector")

This will be released on CRAN in the next few weeks.

lluo0 commented 11 months ago

Thank you! I had to downgrade dplyr for it to work but just now was also able to connect using redshift after upgrading the CDMConnector on OdyOSG.

One other question that I couldn't find an answer for: So I created a cdm by specifying both my cdm schema and my writable schema (where I saved my cohorts to) by

cdm <- CDMConnector::cdm_from_con(con, 
                                  cdm_schema = cdm_schema, 
                                  write_schema = write_schema',
                                  cdm_name = "iqvia_imspm")

However, cdm$ only gives you tables in the cdm schema - could you inform me how I can access tables in my writable/result schema? The tutorials seem to just use the cdm schema to create/edit new cohorts.

ablack3 commented 11 months ago

Maybe this is what you're looking for?

library(CDMConnector)
library(dplyr, warn.conflicts = F)

con <- DBI::dbConnect(RPostgres::Redshift(),
                      dbname   = Sys.getenv("CDM5_REDSHIFT_DBNAME"),
                      host     = Sys.getenv("CDM5_REDSHIFT_HOST"),
                      port     = Sys.getenv("CDM5_REDSHIFT_PORT"),
                      user     = Sys.getenv("CDM5_REDSHIFT_USER"),
                      password = Sys.getenv("CDM5_REDSHIFT_PASSWORD"))

write_schema <- Sys.getenv("CDM5_REDSHIFT_SCRATCH_SCHEMA")
cdm_schema <- Sys.getenv("CDM5_REDSHIFT_CDM_SCHEMA")

# you can add existing cohort tables in the write_schema to a cdm object like so.
cdm <- cdm_from_con(con, cdm_schema, write_schema, cohort_tables = c("cohort"))

cdm$cohort
#> # Source:   table<"public"."cohort"> [?? x 4]
#> # Database: postgres  [ohdsi@rssqltest.cxmbbsphpllo.us-east-1.redshift.amazonaws.com:5439/test]
#>    cohort_definition_id subject_id cohort_start_date cohort_end_date
#>                 <int64>    <int64> <date>            <date>         
#>  1                    1          6 2009-12-21        2010-09-26     
#>  2                    1         11 2008-06-17        2010-12-19     
#>  3                    1         12 2009-04-20        2010-11-23     
#>  4                    1         13 2008-03-05        2010-09-22     
#>  5                    1         20 2010-02-05        2010-09-13     
#>  6                    1         30 2008-07-13        2010-12-29     
#>  7                    1         31 2008-06-17        2010-12-14     
#>  8                    1         34 2008-04-17        2010-07-26     
#>  9                    1         41 2008-03-19        2010-08-28     
#> 10                    1         51 2008-04-05        2010-03-31     
#> # ℹ more rows

list_tables(con, write_schema)
#>  [1] "date_df"                           "temptable1_7476648"               
#>  [3] "cohort"                            "test_exp_count"                   
#>  [5] "test_f_med"                        "test_f_med_count"                 
#>  [7] "test_f_cond_count"                 "test_o_count"                     
#>  [9] "_incidence_working_5"              "test_exp_set"                     
#> [11] "test_f_med_set"                    "test_f_cond_set"                  
#> [13] "test_exp"                          "test_o_set"                       
#> [15] "test_f_cond"                       "test_o"                           
#> [17] "outcome_count"                     "outcome_set"                      
#> [19] "outcome"                           "death_cohort"                     
#> [21] "death_cohort_count"                "drug_strength"                    
#> [23] "visit_occurrence"                  "concept_ancestor"                 
#> [25] "condition_occurrence"              "observation_period"               
#> [27] "person"                            "drug_exposure"                    
#> [29] "test_dusconcept"                   "test_dusconcept_ancestor"         
#> [31] "test_dusdrug_strength"             "test_dusperson"                   
#> [33] "test_dusobservation_period"        "test_dusdrug_exposure"            
#> [35] "test_duscondition_occurrence"      "test_dusvisit_occurrence"         
#> [37] "test_dusobservation"               "test_duscohort1"                  
#> [39] "test_duscohort1_set"               "test_duscohort1_attrition"        
#> [41] "test_duscohort1_count"             "test_duscohort2"                  
#> [43] "test_duscohort2_set"               "test_duscohort2_attrition"        
#> [45] "test_duscohort2_count"             "public.temp_test"                 
#> [47] "tbl1689168563_person"              "tbl1689168563_vocabulary"         
#> [49] "tbl1689169346_person"              "tbl1689169346_vocabulary"         
#> [51] "tbl1689169381_person"              "tbl1689169381_vocabulary"         
#> [53] "test_cohort_inclusion"             "test_cohort_censor_stats"         
#> [55] "test_cohort_inclusion_result"      "test_cohort_inclusion_stats"      
#> [57] "test_cohort_summary_stats"         "test1690062908_person"            
#> [59] "test1690062908_observation_period" "test1690063128_person"            
#> [61] "test1690063128_observation_period" "test1690063445_person"            
#> [63] "test1690063445_observation_period" "test1690063532_person"            
#> [65] "test1690063532_observation_period" "test1690063571_person"            
#> [67] "test1690063571_observation_period" "test1690108723_person"            
#> [69] "test1690108723_observation_period" "test1690110997_person"            
#> [71] "test1690110997_observation_period" "test1690111043_person"            
#> [73] "test1690111043_observation_period" "test1690214626_person"            
#> [75] "test1690214626_observation_period" "wfzewaoqbjms_test_table"          
#> [77] "new_cohort"                        "new_cohort_set"                   
#> [79] "new_cohort_count"                  "new_cohort_attrition"             
#> [81] "tmpdate"                           "chrt0"                            
#> [83] "test_cohort_count"                 "test_cohort_set"                  
#> [85] "test_cohort"                       "cohort_set"                       
#> [87] "cohort_count"                      "cohort_attrition"

# to create references to other standalone tables do this
a_new_table_reference <- tbl(con, dbplyr::in_schema(write_schema, "date_df" ))

a_new_table_reference
#> # Source:   SQL [1 x 2]
#> # Database: postgres  [ohdsi@rssqltest.cxmbbsphpllo.us-east-1.redshift.amazonaws.com:5439/test]
#>   date_1     date_2    
#>   <date>     <date>    
#> 1 2022-11-01 2023-11-01

a_new_table_reference %>% 
  count()
#> # Source:   SQL [1 x 1]
#> # Database: postgres  [ohdsi@rssqltest.cxmbbsphpllo.us-east-1.redshift.amazonaws.com:5439/test]
#>         n
#>   <int64>
#> 1       1

DBI::dbDisconnect(con)

Created on 2023-07-26 with reprex v2.0.2

lluo0 commented 11 months ago

Thanks so much! That's super helpful :)