Closed ksrinivasprabhu1999 closed 4 years ago
I have this file mtx_ACCRNAseq2.rda saved in dir. Please let me know how I can fix it and move further. Because running this is giving me an error - Error in source("~/.active-rstudio-document") : attempt to use zero-length variable name. Please help. Thank you.
library(knitr) opts_chunk$set(cache.path='cache/', fig.path='img/', cache=F, tidy=T, fig.keep='high', echo=F, dpi=100, warnings=F, message=F, comment=NA, warning=F, results='as.is', fig.width = 10, fig.height = 6) #out.width=700, library(pander) panderOptions('table.split.table', Inf) set.seed(1) library(dplyr) options(stringsAsFactors = FALSE) library(openxlsx) library(dplyr) library(ggplot2) load_data <- function(disease = cancer, data.type = data.type, type = type, data_dir = data_dir, force_reload = FALSE) { FILE = paste0(datadir, "/mtx", disease, "", data.type, "", type, ".rda") # R object with data if (all(file.exists(FILE), !(force_reload))) {
load(file = FILE)
} else {
mtx <- getTCGA(disease = disease, data.type = data.type, type = type, clinical = TRUE)
save(file = FILE, list = c("mtx")) # Save it
} return(mtx) } save_res <- function(res, fileName = fileName, wb = wb, sheetName = "KEGG") { if (nrow(res) > 0) { openxlsx::addWorksheet(wb = wb, sheetName = sheetName) openxlsx::writeData(wb, res, sheet = sheetName) openxlsx::saveWorkbook(wb, fileName, overwrite = TRUE) } } save_enrichr <- function(up.genes = up.genes, dn.genes = NULL, databases = "KEGG_2016", fdr.cutoff = 1, fileNameOut = NULL, wb = NULL) { print(paste("Running", databases, "analysis", sep = " ")) if (is.null(dn.genes)) { res.kegg <- enrichGeneList(up.genes, databases = databases, fdr.cutoff = 1) } else { res.kegg <- enrichFullGeneList(up.genes, dn.genes, databases = databases, fdr.cutoff = 1) }
res.kegg$pval <- formatC(res.kegg$pval, digits = 3, format = "e") res.kegg$qval <- formatC(res.kegg$qval, digits = 3, format = "e") if (!is.null(fileNameOut)) { if (nchar(databases) > 30) databases <- paste0(substr(databases, 1, 20), "_", substr(databases, nchar(databases) - 8, nchar(databases))) # If a database is longer that 30 characters, keep first 20 and last 10 characters save_res(res.kegg, fileNameOut, wb = wb, sheetName = databases) } pause_sec <- round(runif(1, min = 1, max = 10)) Sys.sleep(pause_sec) return(res.kegg) }
summarySE <- function(data=NULL, measurevar, groupvars=NULL, na.rm=FALSE, conf.interval=.95, .drop=TRUE) { library(plyr)
length2 <- function (x, na.rm=FALSE) { if (na.rm) sum(!is.na(x)) else length(x) }
datac <- ddply(data, groupvars, .drop=.drop, .fun = function(xx, col) { c(N = length2(xx[[col]], na.rm=na.rm), mean = mean (xx[[col]], na.rm=na.rm), sd = sd (xx[[col]], na.rm=na.rm) ) }, measurevar )
datac <- rename(datac, c("mean" = measurevar)) datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean
ciMult <- qt(conf.interval/2 + .5, datac$N-1) datac$ci <- datac$se * ciMult return(datac) }
system("mkdir -p data")
# system("mkdir -p results")
# Path where the downloaded data is stored
data_dir = "E:/Data/GenomeRunner/TCGAsurvival/data" # Windows
# Selected genes
selected_genes <- c("MIA", "FOXA1")
data.type = "RNASeq2"; type = ""
# All cancers with RNASeq2 data
cancer_RNASeq2 = c("ACC", "BLCA", "BRCA" , "CESC", "CHOL", "COAD", "COADREAD", "DLBC", "ESCA", "GBM", "GBMLGG", "HNSC", "KICH", "KIPAN", "KIRC", "KIRP", "LGG", "LIHC", "LUAD", "LUSC", "MESO", "OV", "PAAD", "PCPG", "PRAD", "READ", "SARC", "SKCM", "STAD", "TGCT", "THCA", "THYM", "UCEC", "UCS")
r paste(selected_genes, collapse = ", ")
fileNameIn <- (paste0("data/", paste(selected_genes, collapse = "_"), "_coexpression_", data.type, "_", type, ".Rda"))
if (!file.exists(fileNameIn)) {
selected_genes_expr <- rbind() # data frame to rbind cancer-specific expression of selected genes
for (cancer in cancer_RNASeq2) {
all_exprs <- list() # List to store cancer-specific expression matrixes
# print(paste0("Processing cancer ", cancer))
# Prepare expression data
mtx <- load_data(disease = cancer, data.type = data.type, type = type, data_dir = data_dir, force_reload = FALSE)
expr <- mtx$merged.dat[ , 4:ncol(mtx$merged.dat)] %>% as.matrix
# Sanity check that genes are in the matrix
# colnames(expr)[(colnames(expr) %in% selected_genes)]
expr <- (expr + 1) %>% log2 %>% t
expr_selected <- expr[rownames(expr) %in% selected_genes, , drop = FALSE]
# The data in form of selected gene expression (columns), and a "cancer" label variable
selected_genes_expr <- rbind(selected_genes_expr, data.frame(t(expr_selected), cancer = rep(cancer, ncol(expr))))
}
save(selected_genes_expr, file = fileNameIn) # Select cancers
} else {
load(file = fileNameIn)
}
gdata <- reshape2::melt(selected_genes_expr)
# ggplot(gdata, aes(x = cancer, y = value, fill = variable)) + geom_boxplot()
# ggplot(gdata, aes(x = cancer, y = value, fill = variable)) + geom_bar(position=position_dodge(), stat = "summary", fun.y = "mean")
# http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/
gdata_summary <- summarySE(gdata, measurevar="value", groupvars=c("cancer", "variable"))
ggplot(gdata_summary, aes(x = cancer, y = value, fill = variable)) +
geom_bar(position=position_dodge(), stat="identity",
colour="black", # Use black outlines,
size=.3) + # Thinner lines
geom_errorbar(aes(ymin=value-se, ymax=value+se),
size=.3, # Thinner lines
width=.2,
position=position_dodge(.9)) +
xlab("Cancer type") +
ylab("log2 expression") +
scale_fill_hue(name="Gene", # Legend label, use darker colors
breaks=selected_genes,
labels=selected_genes) +
ggtitle("Expression of selected genes in all TCGA cancers") +
scale_y_continuous(breaks=0:20*4) +
theme_bw() +
theme(axis.text.x = element_text(angle = 90, hjust = 1))
THIS IS MY ENTIRE CODE. in my DIR that there is only 1 .rda file. DO you need any other data ? Error is Error in source("~/.active-rstudio-document") :
attempt to use zero-length variable name
I would recommend using this R package https://bioconductor.org/packages/release/data/experiment/html/curatedTCGAData.html.
https://github.com/mdozmorov/TCGAsurvival/blob/a2037ee35ffc17ff79de77e60de1d0fac34367c1/TCGA_expression.Rmd#L127