rargelaguet / scnmt_gastrulation

scNMT-seq gastrulation
41 stars 12 forks source link

How to calculate the fraction and the total number of differentially methylated or accessible loci ? #5

Open JingGuo1997 opened 1 year ago

JingGuo1997 commented 1 year ago

Hi, Ricard @rargelaguet I am reproducing Extended Data Fig. 6a of the article "Multi-omics profiling of mouse gastrulation at single-cell resolution" . I have currently downloaded Supplementary Table 3/4(Differential methylation/ accessibility analysis between germ layers), but my results are different from those in the article. I would like to inquire about the dataset can be used to reproduce Extended Data Fig. 6a.

right is my resulte:

image

Thanks for your help!

JingGuo1997 commented 1 year ago

Hi, Ricard @rargelaguet I am reproducing Extended Data Fig. 6a of the article "Multi-omics profiling of mouse gastrulation at single-cell resolution" . I have currently downloaded Supplementary Table 3/4(Differential methylation/ accessibility analysis between germ layers), but my results are different from those in the article. I would like to inquire about the dataset can be used to reproduce Extended Data Fig. 6a.

right is my resulte: image Thanks for your help!

I solved this problem by downloading :ftp://ftp.ebi.ac.uk/pub/databases/scnmt_gastrulation/scnmt_gastrulation.tar.gz.

JingGuo1997 commented 1 year ago

Hi, Ricard @rargelaguet. I have been trying to reproduce the results mentioned in the article by downloading the scnmt_gastrulation.tar.gz file and using the following scripts: metaccrna/differential/load_settings.R, load_data.R, and plot_differential_barplots.Rmd. However, I have not been able to complete replicate the results. Could you please help me identify where the problem might be? This is rather important to me, as I'm currently delving into the world of bioinformatics. Many thanks for any assistance you can provide!

right is my resulte:

image

and this is my code:


#####################
## Define settings ##
#####################

## Define I/O ##

io <- list()

  io$basedir <- "../.."

io$sample.metadata <- paste0(io$basedir,"/sample_metadata.txt")
io$met.dir <- paste0(io$basedir,"/met/feature_level")
io$acc.dir <- paste0(io$basedir,"/acc/feature_level")
io$met.stats <- paste0(io$basedir,"/met/results/stats/sample_stats.txt")
io$acc.stats <- paste0(io$basedir,"/acc/results/stats/sample_stats.txt")
io$annos_dir  <- paste0(io$basedir, "/features/genomic_contexts")

# Folders with the differential analysis results
io$diff.met <- paste0(io$basedir,"/met/results/differential")
io$diff.acc <- paste0(io$basedir,"/acc/results/differential")
io$diff.rna <- paste0(io$basedir,"/rna/results/differential")

## Define options ##
opts <- list()

# Define genomic contexts for methylation
opts$met.annos <- c(
  # "genebody"="Gene body",
  "prom_2000_2000"="Promoters",
  # "prom_2000_2000_cgi"="CGI promoters",
  # "prom_2000_2000_noncgi"="non-CGI promoters",
  "H3K27ac_distal_E7.5_Mes_intersect12"="Mesoderm enhancers",
  "H3K27ac_distal_E7.5_End_intersect12"="Endoderm enhancers",
  "H3K27ac_distal_E7.5_Ect_intersect12"="Ectoderm enhancers"
  # "H3K4me3_E7.5_Mes"="Mes- H3K4me3",
  # "H3K4me3_E7.5_End"="End- H3K4me3",
  # "H3K4me3_E7.5_Ect"="Ect- H3K4me3"
)

# Define genomic contexts for accessibility
opts$acc.annos <- c(
  # "genebody"="Gene body",
  "prom_2000_2000"="Promoters",
  # "prom_2000_2000_cgi"="CGI promoters",
  # "prom_2000_2000_noncgi"="non-CGI promoters",
  "H3K27ac_distal_E7.5_Mes_intersect12"="Mesoderm enhancers",
  "H3K27ac_distal_E7.5_End_intersect12"="Endoderm enhancers",
  "H3K27ac_distal_E7.5_Ect_intersect12"="Ectoderm enhancers"
  # "H3K4me3_E7.5_Mes"="Mes- H3K4me3",
  # "H3K4me3_E7.5_End"="End- H3K4me3",
  # "H3K4me3_E7.5_Ect"="Ect- H3K4me3"
)

###############################################
## Load differential DNA methylation results ##
###############################################

if (!is.null(io$diff.met)) {

  # Mesoderm-specific
  if (opts$diff.type==1) {
    diff.met.mes <- lapply(names(opts$met.annos), function(x)
    # diff.met.mes <- lapply(list("H3K27ac_distal_E7.5_Mes_intersect12"), function(x)
      fread(sprintf("%s/E7.5Mesoderm_vs_E7.5EndodermEctoderm_%s.txt.gz",io$diff.met,x))
    ) %>% rbindlist() %>% .[,lineage:="Mesoderm"] %>%
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff]

    # Ectoderm-specific
    diff.met.ect <- lapply(names(opts$met.annos), function(x)
    # diff.met.ect <- lapply(list("H3K27ac_distal_E7.5_Ect_intersect12"), function(x)
      fread(sprintf("%s/E7.5Ectoderm_vs_E7.5MesodermEndoderm_%s.txt.gz",io$diff.met,x))
    ) %>% rbindlist() %>% .[,lineage:="Ectoderm"] %>%
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff]

    # Endoderm-specific
    diff.met.end <- lapply(names(opts$met.annos), function(x)
    # diff.met.end <- lapply(list("H3K27ac_distal_E7.5_End_intersect12"), function(x)
      fread(sprintf("%s/E7.5Endoderm_vs_E7.5MesodermEctoderm_%s.txt.gz",io$diff.met,x))
    ) %>% rbindlist() %>% .[,lineage:="Endoderm"] %>%
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff]
  }

  if (opts$diff.type==2) {

    # Mesoderm-specific
    diff.met.mes <- lapply(names(opts$met.annos), function(x)
      rbind(
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist %>% .[,lineage1:="Mesoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Ectoderm==T & sign(diff_Ectoderm)==sign(diff_Endoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Ectoderm-specific
    diff.met.ect <- lapply(names(opts$met.annos), function(x)
      rbind(
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Mesoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Ectoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Mesoderm==T & sign(diff_Endoderm)==sign(diff_Mesoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Mesoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Endoderm-specific
    diff.met.end <- lapply(names(opts$met.annos), function(x)
      rbind(
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Mesoderm"],
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Endoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Mesoderm==T & sig_Ectoderm==T & sign(diff_Ectoderm)==sign(diff_Mesoderm)] %>%
      .[,diff:=(diff_Mesoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")
    }
    if (opts$diff.type==3) {

    # Mesoderm-specific
    diff.met.mes <- lapply(list("H3K27ac_distal_E7.5_Mes_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist %>% .[,lineage1:="Mesoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Ectoderm==T & sign(diff_Ectoderm)==sign(diff_Endoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Ectoderm-specific
    diff.met.ect <- lapply(list("H3K27ac_distal_E7.5_Ect_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Mesoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Ectoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Mesoderm==T & sign(diff_Endoderm)==sign(diff_Mesoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Mesoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Endoderm-specific
    diff.met.end <- lapply(list("H3K27ac_distal_E7.5_End_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Mesoderm"],
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.met,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Endoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.met.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Mesoderm==T & sig_Ectoderm==T & sign(diff_Ectoderm)==sign(diff_Mesoderm)] %>%
      .[,diff:=(diff_Mesoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")
    }

  diff.met <- do.call("rbind", list(diff.met.mes,diff.met.end,diff.met.ect))
  #rm(diff.met.mes,diff.met.ect,diff.met.end)
}

######################################################
## Load differential chromatin accessiblity results ##
######################################################

if (!is.null(io$diff.acc)) {

  if (opts$diff.type==1) {

    # Mesoderm-specific
    diff.acc.mes <- lapply(names(opts$acc.annos), function(x)
      fread(sprintf("%s/E7.5Mesoderm_vs_E7.5EndodermEctoderm_%s.txt.gz",io$diff.acc,x))
    ) %>% rbindlist() %>% .[,lineage:="Mesoderm"] %>%
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff]

    # Ectoderm-specific
    diff.acc.ect <- lapply(names(opts$acc.annos), function(x)
      fread(sprintf("%s/E7.5Ectoderm_vs_E7.5MesodermEndoderm_%s.txt.gz",io$diff.acc,x))
    ) %>% rbindlist() %>% .[,lineage:="Ectoderm"] %>%
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff]

    # Endoderm-specific
    diff.acc.end <- lapply(names(opts$acc.annos), function(x)
      fread(sprintf("%s/E7.5Endoderm_vs_E7.5MesodermEctoderm_%s.txt.gz",io$diff.acc,x))
    ) %>% rbindlist() %>% .[,lineage:="Endoderm"] %>%
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff]

  }

  if (opts$diff.type==2) {
    # Mesoderm-specific
    diff.acc.mes <- lapply(names(opts$acc.annos), function(x)
    # diff.acc.mes <- lapply(list("H3K27ac_distal_E7.5_Mes_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Mesoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Ectoderm==T & sign(diff_Endoderm)==sign(diff_Ectoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Ectoderm-specific
    # diff.acc.ect <- lapply(list("H3K27ac_distal_E7.5_Ect_intersect12"), function(x)
    diff.acc.ect <- lapply(names(opts$acc.annos), function(x)
      rbind(
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Mesoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Ectoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Mesoderm==T & sign(diff_Endoderm)==sign(diff_Mesoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Mesoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Endoderm-specific
    # diff.acc.end <- lapply(list("H3K27ac_distal_E7.5_End_intersect12"), function(x)
    diff.acc.end <- lapply(names(opts$acc.annos), function(x)
      rbind(
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Mesoderm"],
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Endoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Mesoderm==T & sig_Ectoderm==T & sign(diff_Mesoderm)==sign(diff_Ectoderm)] %>%
      .[,diff:=(diff_Mesoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

  }

  if (opts$diff.type==3) {

    # Mesoderm-specific
    diff.acc.mes <- lapply(list("H3K27ac_distal_E7.5_Mes_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Mesoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Mesoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Ectoderm==T & sign(diff_Endoderm)==sign(diff_Ectoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Ectoderm-specific
    diff.acc.ect <- lapply(list("H3K27ac_distal_E7.5_Ect_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Endoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Endoderm"],
        fread(sprintf("%s/E7.5Ectoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Mesoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Ectoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Endoderm==T & sig_Mesoderm==T & sign(diff_Endoderm)==sign(diff_Mesoderm)] %>%
      .[,diff:=(diff_Endoderm+diff_Mesoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")

    # Endoderm-specific
    diff.acc.end <- lapply(list("H3K27ac_distal_E7.5_End_intersect12"), function(x)
      rbind(
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Mesoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Mesoderm"],
        fread(sprintf("%s/E7.5Endoderm_vs_E7.5Ectoderm_%s.txt.gz",io$diff.acc,x)) %>% .[,lineage2:="Ectoderm"]
      )) %>% rbindlist() %>% .[,lineage1:="Endoderm"] %>% 
      .[,sig:=padj_fdr<opts$min.fdr & abs(diff)>opts$min.acc.diff] %>%
      data.table::dcast(id+anno+lineage1~lineage2, value.var=c("diff","padj_fdr","sig")) %>%
      .[,sig:=sig_Mesoderm==T & sig_Ectoderm==T & sign(diff_Mesoderm)==sign(diff_Ectoderm)] %>%
      .[,diff:=(diff_Mesoderm+diff_Ectoderm)/2] %>%
      .[,c("id","anno","lineage1","diff","sig")] %>% setnames("lineage1","lineage")
  }

  diff.acc <- do.call("rbind", list(diff.acc.mes,diff.acc.end,diff.acc.ect))
  #rm(diff.acc.mes,diff.acc.ect,diff.acc.end)
}

library(data.table)
library(purrr)
library(ggplot2)

source("load_settings.R")

opts$annos <- c(
  "prom_2000_2000"="Promoters",
  "H3K27ac_distal_E7.5_Mes_intersect12"="Mes- enhancers",
  "H3K27ac_distal_E7.5_End_intersect12"="End- enhancers",
  "H3K27ac_distal_E7.5_Ect_intersect12"="Ect- enhancers",
  "H3K4me3_E7.5_Mes"="Mes- H3K4me3",
  "H3K4me3_E7.5_End"="End- H3K4me3",
  "H3K4me3_E7.5_Ect"="Ect- H3K4me3"
)
opts$met.annos <- opts$acc.annos <- opts$annos

opts$diff.type <- 2
opts$min.fdr <- 0.1
opts$min.acc.diff <- 10
opts$min.met.diff <- 10

source("load_data.R")

diff.metacc <- rbind(
  diff.met[,type:="met"] %>% .[,c("id","anno","diff","sig","lineage","type")], 
  diff.acc[,type:="acc"] %>% .[,c("id","anno","diff","sig","lineage","type")] 
) %>% dcast(id+lineage+anno~type, value.var=c("diff","sig")) %>% .[complete.cases(.)] %>%
  .[,anno:=stringr::str_replace_all(anno,opts$annos)]

tmp <- diff.metacc %>%
  .[,.(Nmet=mean(sig_met,na.rm=T), Nacc=mean(sig_acc,na.rm=T)), by=c("anno","lineage")] %>%
  melt(id.vars=c("anno","lineage"), variable.name="assay", value.name="N")```