Closed ixxmu closed 1 year ago
大神一句话,菜鸟跑半年。我不是大神,但我可以缩短你走弯路的半年~
就像歌儿唱的那样,如果你不知道该往哪儿走,就留在这学点生信好不好~
这里有豆豆和花花的学习历程,从新手到进阶,生信路上有你有我!
官网访问有障碍,直接去xena吧。
https://xenabrowser.net/datapages/?cohort=GTEX&removeHub=https%3A%2F%2Fxena.treehouse.gi.ucsc.edu%3A443
下载tpm和临床信息。
rm(list = ls())
dat = data.table::fread("gtex_RSEM_gene_tpm.gz",data.table = F)
dat[1:4,1:4]
## sample GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## 1 ENSG00000242268.2 -3.4580 -9.9658
## 2 ENSG00000259041.1 -9.9658 -9.9658
## 3 ENSG00000270112.3 -3.6259 -2.1779
## 4 ENSG00000167578.16 4.5988 4.6294
## GTEX-13QIC-0011-R1a-SM-5O9CJ
## 1 -9.9658
## 2 -9.9658
## 3 -1.8314
## 4 6.4989
library(tidyverse)
exp = column_to_rownames(dat,"sample") %>% as.matrix()
rownames(exp) = rownames(exp) %>% str_split("\\.",simplify = T) %>% .[,1]
exp[1:4,1:4]
## GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## ENSG00000242268 -3.4580 -9.9658
## ENSG00000259041 -9.9658 -9.9658
## ENSG00000270112 -3.6259 -2.1779
## ENSG00000167578 4.5988 4.6294
## GTEX-13QIC-0011-R1a-SM-5O9CJ GTEX-ZPCL-0126-SM-4WWC8
## ENSG00000242268 -9.9658 -9.9658
## ENSG00000259041 -9.9658 -9.9658
## ENSG00000270112 -1.8314 -9.9658
## ENSG00000167578 6.4989 5.5358
# 转换行名
library(AnnoProbe)
library(tinyarray)
an = annoGene(rownames(exp),ID_type = "ENSEMBL")
exp = trans_array(exp,ids = an,from = "ENSEMBL",to = "SYMBOL")
exp[1:4,1:4]
## GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## DDX11L1 -5.0116 -9.9658
## WASH7P 4.4283 3.8370
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
## GTEX-13QIC-0011-R1a-SM-5O9CJ GTEX-ZPCL-0126-SM-4WWC8
## DDX11L1 -9.9658 -9.9658
## WASH7P 3.5863 3.6793
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
clinical = data.table::fread("GTEX_phenotype.gz")
table(clinical$`_primary_site`)
##
## <not provided> Adipose Tissue Adrenal Gland Bladder Blood
## 5 621 161 13 595
## Blood Vessel Bone Marrow Brain Breast Cervix Uteri
## 753 102 1426 221 11
## Colon Esophagus Fallopian Tube Heart Kidney
## 384 805 7 493 38
## Liver Lung Muscle Nerve Ovary
## 141 381 478 335 112
## Pancreas Pituitary Prostate Salivary Gland Skin
## 203 126 122 71 977
## Small Intestine Spleen Stomach Testis Thyroid
## 106 121 209 208 366
## Uterus Vagina
## 93 99
clinical = clinical[clinical$`_primary_site`!="<not provided>",]
colnames(clinical)[3] = "site"
s = intersect(colnames(exp),clinical$Sample)
clinical = clinical[match(s,clinical$Sample),]
exp = exp[,s]
identical(clinical$Sample,colnames(exp))
## [1] TRUE
exp[1:4,1:4]
## GTEX-S4Q7-0003-SM-3NM8M GTEX-QV31-1626-SM-2S1QC
## DDX11L1 -5.0116 -9.9658
## WASH7P 4.4283 3.8370
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
## GTEX-13QIC-0011-R1a-SM-5O9CJ GTEX-ZPCL-0126-SM-4WWC8
## DDX11L1 -9.9658 -9.9658
## WASH7P 3.5863 3.6793
## MIR6859-1 -9.9658 -9.9658
## MIR1302-2HG -9.9658 -9.9658
library(dplyr)
#"METTL3","SETD2","TP53"
g = "METTL3"
pdat = cbind(gene = exp[g,],clinical[,c(1,3)])
library(tidyr)
pdat = drop_na(pdat,site)
su = group_by(pdat,site) %>%
summarise(a = median(gene)) %>%
arrange(desc(a))
pdat$site = factor(pdat$site,levels = su$site)
library(ggplot2)
library(RColorBrewer)
mypalette <- colorRampPalette(brewer.pal(8,"Set1"))
ggplot(pdat,aes(x = site,y = gene,fill = site))+
geom_boxplot()+
theme_bw()+
theme(axis.text.x = element_text(vjust = 1,hjust = 1,angle = 70),legend.position = "bottom")+
scale_fill_manual(values = mypalette(31))+
guides (fill=guide_legend (nrow=3, byrow=TRUE))
如果因为代码看不懂,而跟不上正文的节奏,可以来找我,系统学习。我的课程都是循环开课。下一期的时间,点进去咨询微信咯 产假结束,核酸尚阴,期待遇见2023年的你
https://mp.weixin.qq.com/s/6djEPfpbiWgyjiCafqvtFg