Open shntnu opened 3 years ago
Notebook to produce
library(tidyverse)
dfmc <- read_tsv("JUMP-Target-add_files/JUMP-Target_compound_metadata.tsv")
drug_target_samples <- read_csv("https://raw.githubusercontent.com/jump-cellpainting/jump-cellpainting/master/0.design-pilots/output/drug_target_samples.csv?token=AAJHQPEBFYWZ2CEVJUZ7IHC77BEAK")
dfmc_pert_iname <-
dfmc %>%
select(pert_iname) %>%
mutate(pert_iname = str_split(pert_iname, "\\|")) %>%
unnest(cols = c(pert_iname)) %>%
mutate(pert_iname = str_trim(pert_iname)) %>%
distinct()
dfmc_pert_iname %>%
count() %>%
knitr::kable()
n |
---|
304 |
dfmc_pert_iname %>%
inner_join(drug_target_samples %>% distinct(pert_iname, target)) %>%
distinct(pert_iname) %>%
count() %>%
knitr::kable()
## Joining, by = "pert_iname"
n |
---|
304 |
dfmc_full_targets <-
dfmc %>%
mutate(pert_iname = str_split(pert_iname, "\\|")) %>%
unnest(cols = c(pert_iname)) %>%
mutate(pert_iname = str_trim(pert_iname)) %>%
distinct(pert_iname, broad_sample) %>%
inner_join(dfmc_pert_iname %>%
inner_join(drug_target_samples %>%
distinct(pert_iname, target)))
## Joining, by = "pert_iname"
## Joining, by = "pert_iname"
dfmc_full_targets <-
bind_rows(dfmc_full_targets,
dfmc %>%
distinct(broad_sample, pert_iname, target)) %>%
distinct(broad_sample, pert_iname, target)
dfmc_full_targets %>%
distinct(pert_iname) %>%
count() %>%
knitr::kable()
n |
---|
305 |
dfmc_full_targets %>%
mutate(pert_id = str_sub(broad_sample, 1, 13)) %>%
distinct(pert_id) %>%
count() %>%
knitr::kable()
n |
---|
306 |
dfmc_full_targets %>%
write_csv("JUMP-Target_compound_metadata_all_targets.csv")
dfmc <- read_tsv("JUMP-Target-add_files/JUMP-Target_compound_metadata.tsv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## broad_sample = col_character(),
## InChIKey = col_character(),
## pert_iname = col_character(),
## pubchem_cid = col_double(),
## target = col_character(),
## pert_type = col_character(),
## control_type = col_character(),
## smiles = col_character()
## )
dfmo <- read_tsv("JUMP-Target-add_files/JUMP-Target_orf_metadata.tsv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## broad_sample = col_character(),
## genes = col_character(),
## pert_type = col_character(),
## control_type = col_character()
## )
dfmx <- read_tsv("JUMP-Target-add_files/JUMP-Target_crispr_metadata.tsv")
##
## ── Column specification ────────────────────────────────────────────────────────
## cols(
## broad_sample = col_character(),
## genes = col_character(),
## pert_type = col_character(),
## control_type = col_character(),
## target_sequence = col_character()
## )
dfmc1 <- dfmc_full_targets %>% distinct(broad_sample, pert_iname, target) %>% select(broad_sample_compound = broad_sample, pert_iname, gene = target)
dfmo1 <- dfmo %>% distinct(broad_sample, genes) %>% select(broad_sample_orf = broad_sample, gene = genes)
dfmx1 <- dfmx %>% distinct(broad_sample, genes) %>% select(broad_sample_crispr = broad_sample, gene = genes) %>% na.omit()
dfcg1 <- dfmc1 %>% distinct(gene) %>% na.omit() %>% pull("gene")
dfog <- dfmo1 %>% distinct(gene) %>% na.omit() %>% pull("gene")
dfxg <- dfmx1 %>% distinct(gene) %>% na.omit() %>% pull("gene")
connections <-
dfmc1 %>%
inner_join(dfmo1) %>%
inner_join(dfmx1) %>%
select(gene, broad_sample_compound, pert_iname, broad_sample_orf, broad_sample_crispr)
## Joining, by = "gene"
## Joining, by = "gene"
connections %>%
distinct(gene) %>%
count %>%
knitr::kable()
n |
---|
160 |
connections %>%
distinct(broad_sample_compound, broad_sample_orf) %>%
count %>%
knitr::kable()
n |
---|
466 |
connections %>%
distinct(broad_sample_compound, broad_sample_crispr) %>%
count %>%
knitr::kable()
n |
---|
893 |
connections %>%
distinct(broad_sample_compound, broad_sample_orf) %>%
count %>%
knitr::kable()
n |
---|
466 |
connections %>%
distinct(broad_sample_orf, broad_sample_crispr) %>%
count %>%
knitr::kable()
n |
---|
305 |
connections %>%
write_csv("JUMP-Target_compounds_crispr_orf_connections.csv")
connections %>%
distinct(broad_sample_compound, broad_sample_orf) %>%
group_by(broad_sample_orf) %>%
tally(name = "n_compounds") %>%
group_by(n_compounds) %>%
tally(name = "n_orfs") %>%
select(n_orfs, n_compounds) %>%
knitr::kable()
n_orfs | n_compounds |
---|---|
6 | 1 |
83 | 2 |
34 | 3 |
19 | 4 |
8 | 5 |
4 | 6 |
1 | 7 |
2 | 8 |
1 | 9 |
2 | 10 |
The full list of targets is available here https://github.com/jump-cellpainting/jump-cellpainting/blob/master/0.design-pilots/output/drug_target_samples.csv
These 7 compounds are not listed in that CSV, so we'd need to fetch this from somewhere else, probably https://github.com/broadinstitute/lincs-cell-painting