ixxmu / mp_duty

抓取网络文章到github issues保存
https://archives.duty-machine.now.sh/
120 stars 30 forks source link

复现Nature经典空转Spatial multiomic map of human myocardial infarction #4139

Closed ixxmu closed 11 months ago

ixxmu commented 11 months ago

https://mp.weixin.qq.com/s/EgHYBTXUUSjW3SUv_dd1kQ

ixxmu commented 11 months ago

复现Nature经典空转Spatial multiomic map of human myocardial infarction by 生信菜鸟团

个人感觉这篇文章在空转领域太经典了。在更高精度的空转技术出来之前,这篇技术非常值得学习。因文章本身代码太多,最近刚理清了他的代码思路,同时结合自己的数据,就开始着手复现。如果感兴趣可以跟我一起学习,估计这篇文章完整复现需要1年时间,敬请期待。

文章的复现,需要使用cell2location,所以要了解hpc高性能计算,学会使用bsub提交作业。或者你可以使用其他软件获取细胞丰度文件也行。最重要的是理解Misty这个r包的原理和使用。

复现分为两部分:

  1. 是关于本部分必须掌握的基础知识,如果你连基础知识都没掌握,就没必要看第二步了哦

  2. 是最终的代码部分,有的地方我会加上注释



    提醒:这部分的复现要求:1需要你已经制备好seurat对象的空转文件,里面包含矩阵和image才能正常运行哦。2你要学会progeny包的使用 3 如果有细胞丰度文件就更好。


01

purr是一个R语言中的包,旨在简化数据操作的过程,提供了一组功能强大且一致的工具,用于对列表数据进行操作,但它的并行处理是指每个输入同时处理,不涉及到多核计算。


这里是官方提到的参数及其含义:

https://purrr.tidyverse.org/reference/pmap.html

下面这个链接可以加深我们对r中向量 数据框 列表的理解:

https://dcl-prog.stanford.edu/data-structures.html

02

options(warn = -1)
#request 2.libPaths(c( "/home/data/t040413/R/x86_64-pc-linux-gnu-library/4.2", "/home/data/t040413/R/yll/usr/local/lib/R/site-library", "/home/data/refdir/Rlib/", "/usr/local/lib/R/library"))
#https://github.com/saezlab/misty_pipelines/blob/0d23e52524375171bd26182892a18fc55b258818/visium/2_run_misty_ppline.R#L30#https://github.com/saezlab/visium_heart#https://saezlab.github.io/mistyR/articles/# install.packages("remotes")#remotes::install_github("saezlab/mistyR")library(conflicted)conflicts_prefer(dplyr::filter())conflicts_prefer(dplyr::lag())library(tidyverse)library(Seurat)library(progeny)
library(conflicted)conflicts_prefer(dplyr::filter())conflicts_prefer(dplyr::lag())library(conflicted)conflict_scout()
print(getwd())dir.create("~/silicosis/spatial/prcessed_visum_for_progeny/",recursive = TRUE)setwd("~/silicosis/spatial/prcessed_visum_for_progeny/")




#' @param visium_slide: Seurat object with SCT assay#' @param species: human or mouse, will extract regulons based on this#' @param top: number of genes used in footprint#' @return A Seurat object with pathway activities in progeny assayadd_path_activities <- function(visium_slide, species = "human", top = 500, assay = "RNA"){ if(species == "mouse"){ model <- progeny::getModel(organism = "Mouse", top = top) common_genes <- intersect(rownames(GetAssayData(visium_slide, assay = assay)), rownames(model)) progeny_scores <- (t(model)[, common_genes]) %*% (GetAssayData(visium_slide, assay = assay)[common_genes, ]) # Here we scale by feature progeny_scores <- scale(t(as.matrix(progeny_scores))) visium_slide[['progeny']] <- CreateAssayObject(counts = t(progeny_scores))visium_slide@assays
# visium_slide <- readRDS("./processed_visium/SiO2_56.rds")# marker.list=read.table("~/silicosis/markers_for_each_silicosis.csv",header = TRUE,sep = ",") # marker.list=read.table("/home/data/t040413/silicosis/data/tabula_scRNAseq/integration_with_sc_silicosis/markers__foreach_10x.csv",header = TRUE,sep = ",") # marker.list=read.csv("/home/data/t040413/silicosis/markers_foreach_silico_fibro.csv") # load("~/silicosis/silicos_fibro_special_grem1_inflamm.rds" )# load("~/silicosis/sc_marker.list_macro.rds")# #markers_for_Each_macro=read.csv("~/silicosis/markers_for_Each_macro_silicosi.csv")# # marker.list=read.csv("/home/data/t040413/silicosis/marker.list_silicosis_fibro_AM3_mappedbacked.csv")# # # head(marker.list)# sc_marker.list=list()# # for (each in unique(marker.list$cluster)) {# sc_marker.list[[each]]=marker.list[marker.list$cluster==each & marker.list$avg_log2FC>1 & marker.list$p_val_adj<0.05,"gene"]# }# # # sc_marker.list=c(# sc_marker.list_fibro, # sc_marker.list[!c( names(sc_marker.list) %in% names(sc_marker.list_macro) )] ,# sc_marker.list_macro# )# # # head(visium_slide@meta.data)# # assay="Spatial"# visium_slide=AddModuleScore(visium_slide,features =sc_marker.list ,assay = assay)# # colnames(visium_slide@meta.data)# visium_slide@meta.data[ ,(ncol(visium_slide@meta.data)-length(sc_marker.list)+1 ): ncol(visium_slide@meta.data)] %>%head()# sc_marker_names=names(sc_marker.list)# # colnames(visium_slide@meta.data)[ (ncol(visium_slide@meta.data)-length(sc_marker.list)+1 ): ncol(visium_slide@meta.data)] =sc_marker_names# module_score=FetchData(visium_slide,vars = sc_marker_names,)# print(dim(module_score))# dim(visium_slide)# # head(module_score)[,1:9]# # Here we scale by feature# module_score <- scale(as.matrix(module_score))# visium_slide@[['module_score']] <- CreateAssayObject(counts = t(module_score))# visium_slide[['Spatial']] # visium_slide <- readRDS("./processed_visium/SiO2_56.rds") visium_slide@meta.data %>%head() visium_slide$aling_with_spatial= paste(visium_slide$orig.ident,sep = "_", gsub(pattern = "_[1-4]",replacement = "",x = colnames(visium_slide)) ) visium_slide@meta.data %>%head() cell_abundance=read.csv("/home/data/t040413/silicosis/spatial_c2l_results/silicosismappedback_noinflammatory/q05_cell_abundance_w_sf.csv" ,header = TRUE,row.names = "spot_id") head(cell_abundance) table(rownames(cell_abundance) %in% visium_slide$aling_with_spatial) # # Here we scale by spot cell_abundance_scales= apply(cell_abundance, 2, function(x) (x - min(x)) / (max(x) - min(x)))########为了让不同点之间具有可比性,每列scale head(cell_abundance_scales) cell_abundance_scales["SiO2_56_AAACAAGTATCTCCCA-1",] tmp= cell_abundance[rownames( cell_abundance) %in% visium_slide$aling_with_spatial,] head(tmp)[,1:9] head(colnames(visium_slide)) head(visium_slide@meta.data) rownames(tmp)=colnames(visium_slide) visium_slide= AddMetaData(visium_slide,metadata = tmp) head(visium_slide@meta.data) tmp2=FetchData(visium_slide,vars = colnames(tmp))

visium_slide@assays[["cell_abundance_scaled"]]= CreateSeuratObject(counts = t( visium_slide@meta.data[,colnames(tmp)] )) %>% GetAssay(assay="RNA")

visium_slide@assays visium_slide[["cell_abundance_scaled"]]

}else if(species == "human"){ model <- progeny::getModel(organism = "Human", top = top) common_genes <- intersect(rownames(GetAssayData(visium_slide, assay = assay)), rownames(model)) progeny_scores <- (t(model)[, common_genes]) %*% (GetAssayData(visium_slide, assay = assay)[common_genes, ]) # Here we scale by feature progeny_scores <- scale(t(as.matrix(progeny_scores))) visium_slide[['progeny']] <- CreateAssayObject(counts = t(progeny_scores)) } return(visium_slide)}



#setwd("../")print(getwd())## Process all data
dir.create("processed_visium")dir.create("processed_visium_3")

param_df <- tibble("sample_name" = list.dirs("./data",full.names = F, recursive = F), "data_dir" = list.dirs("./data",full.names = T, recursive = F), "sample_file_self"=normalizePath(list.files("./data/", recursive = TRUE, full.names = TRUE) ) ) %>% dplyr::mutate("out_folder" = paste0("./processed_visium_3/", sample_name, ".rds"))param_df print(getwd())
#normalizePath(list.files(path = file.path("./data",sample_name), recursive = TRUE, full.names = TRUE) )# fs::dir_tree() # fs::dir_ls("data")

purrr::walk2(param_df$data_dir, param_df$out_folder, function(dir_name,out_dir_name) {
# dir_name="./data/NS_56" # samplefile_self="/home/data/t040413/silicosis/spatial/prcessed_visum_for_progeny/data/NS_56/NS_56.rds" # out_dir_name="./processed_visium/NS_56.rds"
print(dir_name) sample_name <- gsub("./data/", "", dir_name) sample_file_self= normalizePath( list.files( path = file.path("./data",sample_name), recursive = TRUE, full.names = TRUE) )
visium_slide = readRDS(paste0(sample_file_self)) %>% SCTransform(. , assay = "Spatial", verbose = F) %>% NormalizeData(., normalization.method = 'LogNormalize', scale.factor = 10000, verbose = FALSE) %>%
add_path_activities(., species = "mouse",#"human", top = 1000, assay = "SCT")


tmplist= visium_slide@images[[sample_name]] visium_slide@images=list() visium_slide@images= list(slice1=tmplist)
# str(visium_slide) # str(visium_slide@images[[sample_name]]) # saveRDS(visium_slide, file = out_dir_name) # ##########1------- # DefaultAssay(visium_slide) <- 'module_score' # # sample_name="sio2_56" # dir.create("~/silicosis/spatial/prcessed_visum_for_progeny/results/module_spatial_plts/",recursive = TRUE) # pdf(paste0("~/silicosis/spatial/prcessed_visum_for_progeny/results/module_spatial_plts/", sample_name, ".pdf"), height = 12, width = 10) # # all_paths <- SpatialFeaturePlot(visium_slide, # features =rownames(visium_slide), # ncol = 4, # stroke = 0, # max.cutoff = "q99")+ggtitle("") # plot(all_paths) # # dev.off() # pdf(paste0("~/silicosis/spatial/prcessed_visum_for_progeny/results/module_spatial_plts/", sample_name, "_2.pdf"), height = 12, width = 10) # for (each in rownames(visium_slide)) { # p=SpatialFeaturePlot(visium_slide, # features =each, # ncol = 4, # stroke = 0, # max.cutoff = "q99") # plot(p) # # # } # dev.off() # # ##########2----- DefaultAssay(visium_slide) <- "progeny"
dir.create("./results/progeny_spatial_plts/",recursive = TRUE) pdf(paste0("./results/progeny_spatial_plts/", sample_name, ".pdf"), height = 12, width = 10)
all_paths <- SpatialFeaturePlot(visium_slide, features = rownames(visium_slide), ncol = 4, stroke = 0, max.cutoff = "q99") plot(all_paths)
dev.off() print(paste0(getwd(),sample_name,"=====sample_=========done------")) ##########3---- DefaultAssay(visium_slide) <- "cell_abundance_scaled" dir.create("./results/progeny_spatial_plts/cell_abundance_scaled",recursive = TRUE) pdf(paste0("./results/progeny_spatial_plts/cell_abundance_scaled", sample_name, "_.pdf"), height = 12, width = 10) all_paths <- SpatialFeaturePlot(visium_slide, features = rownames(visium_slide), ncol = 2, stroke = 0, max.cutoff = "q99") plot(all_paths) dev.off() ######3.2---- pdf(paste0("~/silicosis/spatial/prcessed_visum_for_progeny/results/module_spatial_plts/", sample_name, "_3.pdf"), height =12, width = 10) for (each in rownames(visium_slide)) { p=SpatialFeaturePlot(visium_slide, features =each, ncol = 2, stroke = 0, max.cutoff = "q99") plot(p)

p=SpatialFeaturePlot(visium_slide, features =each, max.cutoff = "q99") plot(p) } dev.off() })