w28924461701 / cdc20b

0 stars 0 forks source link

correlation between promoter methylation and expression of interested genes in pancancer.R #8

Open w28924461701 opened 1 year ago

w28924461701 commented 1 year ago

corExprMeth <- NULL for (i in tumors) { message("--",i,"...")

读取只包含感兴趣基因的表达谱和甲基化数据

exprsubset <- read.table(paste0("TCGA",i,"_expr_subset.txt"),sep = "\t",row.names = 1,check.names = F,stringsAsFactors = F,header = T) methsubset <- read.table(paste0("TCGA",i,"_methy450_subset.txt"),sep = "\t",row.names = 1,check.names = F,stringsAsFactors = F,header = T) meth_subset<-t(meth_subset) meth_subset<-as.data.frame(meth_subset) colnames(meth_subset) <- substr(colnames(meth_subset),1,15)

comsam <- intersect(colnames(expr_subset),colnames(meth_subset)) # 获取相同的样本 tumsam <- comsam[substr(comsam,14,14) == "0"] # 仅获取肿瘤样本

expr_subset <- expr_subset[,tumsam] # 取出表达谱子集 meth_subset <- meth_subset[,tumsam] # 取出甲基化子集 rownames(meth_subset)<-'CDC20B' corTab <- NULL for (j in rownames(meth_subset)) { tmp1 <- as.numeric(meth_subset[j,]) # 甲基化beta值 tmp2 <- as.numeric(expr_subset[j,]) # 表达谱 cor.res <- cor.test(tmp1,tmp2, method = "spearman") # 启动子甲基化和表达谱的spearman相关性

corTab <- rbind.data.frame(corTab,
                           data.frame(gene = j,
                                      tumor = i,
                                      Correlation = ifelse(is.na(cor.res$estimate), 0, cor.res$estimate),
                                      Pvalue = ifelse(is.na(cor.res$p.value), 1, cor.res$p.value),
                                      stringsAsFactors = F),
                           stringsAsFactors = F)

} corExprMeth <- rbind.data.frame(corExprMeth, corTab, stringsAsFactors = F) }

write.table(corExprMeth, "TCGA_pancan_correlation_expr_meth_subset.txt",sep = "\t",row.names = F,col.names = T,quote = F)

设置颜色

blue <- "#4577FF" red <- "#C2151A" orange <- "#E45737" green <- "#6F8B35" darkblue <- "#303B7F" darkred <- "#D51113" yellow <- "#EECA1F"

        # 生成泡泡图
        corExprMeth$gene <- factor(corExprMeth$gene,
                                   levels = rev(c("CDC20B")))

        my_palette <- colorRampPalette(c(blue,"white",orange), alpha=TRUE)(n=128)
        ggplot(corExprMeth, aes(x=tumor,y=gene)) +
          geom_point(aes(size=-log10(Pvalue),color=Correlation)) +
          scale_color_gradientn('Correlation', 
                                colors=my_palette) +
          theme_bw() +
          theme(#panel.grid.minor = element_blank(), 
            #panel.grid.major = element_blank(),
            axis.text.x = element_text(angle = 45, size = 12, hjust = 0.3, vjust = 0.5, color = "black"),
            axis.text.y = element_text(size = 12, color = rep(c(red,blue),c(14,10))),
            axis.title = element_blank(),
            panel.border = element_rect(size = 0.7, linetype = "solid", colour = "black"),
            legend.position = "right",
            plot.margin = unit(c(1,1,1,1), "lines"))

        ggsave("Figure 1Einterested genes in pancancer.pdf", width = 8,height = 2)