Riemer1818 / MscProjectDataAnalysis

1 stars 0 forks source link

Recalculating STRING scores #2

Open gdagstn opened 1 month ago

gdagstn commented 1 month ago

This is applied to Differential Expression results, but you can still see how the scores are calculated

# Plot STRING networks using only DEGs and coloring by fold change
plotStringDE <- function(res, alpha = 0.05, lfc = 1, fields.keep = c("neighborhood", "experiments", "database"), 
                         min.score = 700) {
  res = as.data.frame(res)
  res$gene = rownames(res)

  sigs = res$gene[res$padj < alpha & abs(res$log2FoldChange) > lfc]
  sigs = sigs[!is.na(sigs)]

  network = stringdb[stringdb$gene1 %in% sigs & stringdb$gene2 %in% sigs,]

  updatescores = network[,c(fields.keep, paste0(fields.keep, "_transferred"))]/1000

## UPDATING SCORES 

  p = 0.041

  for(i in fields.keep) {
    updatescores[,i] = as.numeric(sapply(updatescores[,i], function(x) {
      if(x < p) x = p
      return((x - p)/(1 - p))
    }))
    updatescores[,paste0(i, "_transferred")] = as.numeric(sapply(updatescores[,paste0(i, "_transferred")], function(x) {
      if(x < p) x = p
      return((x - p)/(1 - p))
    }))
    updatescores[,paste0(i, "_corrected")] = 1 - (1 - updatescores[,i]) * (1 - updatescores[,paste0(i, "_transferred")])
  }

  updatescores_corr = updatescores[,grep("corrected", colnames(updatescores))]

  updatescores_corr$tot_om = (1 - updatescores_corr$neighborhood_corrected) * (1 - updatescores_corr$experiments_corrected) * (1 - updatescores_corr$database_corrected)

  cb = 1 - updatescores_corr$tot_om 
  cb = cb * (1 - p)
  cb = cb + p
  network$newcomb = round(cb * 1000)

  gn = data.frame("from" = network$gene1, "to" = network$gene2)
  gn = gn[network$newcomb >= min.score, ]

  g = graph_from_data_frame(gn, directed = FALSE)
  g = simplify(g)

  res = res[!duplicated(res$gene),]
  rownames(res) = res$gene

  V(g)$log2FoldChange = res[names(V(g)), "log2FoldChange"]

  l <- layout_with_graphopt(g)

  ggraph(g, layout = "manual", x = l[,1], y = l[,2]) + 
    geom_edge_link(width = 0.2) + 
    geom_node_point(aes(fill = log2FoldChange), size = 6, shape = 21) + 
    geom_node_label(mapping = aes(label = names(V(g))), repel = TRUE, size = 2.5, 
                    label.padding = 0.15,) + 
    theme_bw() +
    theme(axis.title = element_blank(), 
          axis.ticks = element_blank(), 
          axis.line = element_blank(), 
          axis.text = element_blank(),
          panel.grid = element_blank()) +
    scale_fill_gradient2(name = "log2(FC)", 
                         low = scales::muted("blue"), 
                         high = scales::muted("red")) 
}
gdagstn commented 1 month ago

Reference to how this was calculated:

https://string-db.org/help/faq/