califano-lab / single-cell-pipeline

40 stars 8 forks source link

gene-convert-dict.rds #11

Open dariader opened 1 year ago

dariader commented 1 year ago

Hello, Can you please help me to find the dictionary that is used in this function? gene-convert-dict.rds

Thanks!

GTExVIPER <- function(dat.mat, gtex.path, num.nets = 3) {
  ## convert to entrez
  convert.dict <- readRDS(paste(gtex.path, 'gene-convert-dict.rds', sep = ''))
  dat.mat <- dat.mat[ which(rownames(dat.mat) %in% convert.dict$Ensembl.Gene.ID) ,] # remove rows with no ENSG match
  rname.match <- match(rownames(dat.mat), convert.dict$Ensembl.Gene.ID) # match remaining ENSG names
  entrez.names <- convert.dict$Entrez.Gene.ID[rname.match] # get Entrez names
  na.inds <- which(is.na(entrez.names)) # remove NA entrez names from matrix
  dat.mat <- dat.mat[ -na.inds ,]; entrez.names <- entrez.names[ -na.inds]
  rownames(dat.mat) <- entrez.names
  print(dim(dat.mat))
  ## load in all gtex networks
  net.files <- dir(gtex.path, pattern = '*.rda')
  gtex.nets <- list()
  for (i in 1:length(net.files)) {
    gtex.nets[[i]] <- get(load( paste(gtex.path, net.files[i], sep = '') ))
  }
  ## compute VIPER for all the networks
  viper.mats <- list()
  for (i in 1:length(gtex.nets)) {
    viper.mats[[i]] <- viper(dat.mat, gtex.nets[[i]], method = 'none')
  }
  ## identify the most important networks (as defined by num.nets)
  net.counts <- rep(0, length(gtex.nets)); names(net.counts) <- 1:length(gtex.nets)
  shared.regs <- Reduce(intersect, lapply(viper.mats, rownames))
  for (i in 1:length(shared.regs)) {
    reg <- shared.regs[i]
    reg.mat <- as.data.frame(lapply(viper.mats, function(x) { x[reg,] }))
    max.inds <- apply(reg.mat, 1, function(x) { which.max(abs(x)) } )
    for (j in 1:length(max.inds)) {
      net.counts[ max.inds[j] ] <- net.counts[ max.inds[j] ] + 1
    }
  }
  net.counts <- sort(net.counts, decreasing = TRUE)
  top.nets <- as.numeric(names(net.counts)[1:num.nets])
  ## clean up for memory purposes
  rm(viper.mats)
  ## integrate the results of the three selected networks
  mVip.mat <- viper(dat.mat, regulon = gtex.nets[top.nets], method = 'none')
  ## convert to ensemble
  mVip.mat <- mVip.mat[ which(rownames(mVip.mat) %in% convert.dict$Entrez.Gene.ID) ,]
  rname.match <- match(rownames(mVip.mat), convert.dict$Entrez.Gene.ID)
  ensg.names <- convert.dict$Ensembl.Gene.ID[rname.match]
  na.inds <- which(ensg.names == '')
  mVip.mat <- mVip.mat[ -na.inds ,]; ensg.names <- ensg.names[ -na.inds ]
  rownames(mVip.mat) <- ensg.names
  ## return
  return(mVip.mat)
}
filmchen commented 7 months ago

Do you have found the rds file? I have the same question.Thank you!