almeidasilvaf / BioNERO

Easy and comprehensive biological network reconstruction and analysis
https://almeidasilvaf.github.io/BioNERO/
25 stars 5 forks source link

Problem with assigning TF to targets #28

Closed Karenmagh closed 1 month ago

Karenmagh commented 1 month ago

Hello dear Fabricio,

I am using Bionero for the first time to infer a GRN. My data is from a non-model organism, whose RNA-seq samples I assembled with Trinity and predicted the TFs with iTAK. I am trying to make the network but I see that the TFs are not being assigned in each group of target genes, I don't know what I am doing wrong, could you help me?

This is the code I am using:

> # Load package after installation
> library(BioNERO)
> library(DESeq2)
> #library(SummarizedExperiment)
> 
> set.seed(123) # for reproducibility
> 
> #Set path
> setwd("C:/Users/enriq/OneDrive - CINVESTAV/Escritorio/Paper Karen 2024/Red_interacion")
> list.files()
[1] "Bionero_Karen.R"             "kallisto.gene.counts.matrix" "matriz.txt"                 
[4] "Sample_info.txt"             "tabla_interacion_2024.csv"   "tabla_targets_all.csv"      
[7] "TF_complete.txt"            
> 
> #Read matrix
> countData <- read.delim("./matriz.txt",header=T, row.names=1)
> head(countData)
                           V2     V3     V4     V5     V6     V7
TRINITY_DN0_c0_g1      548.78 639.07 522.50 617.26 605.77 716.11
TRINITY_DN0_c0_g2       76.24  98.18  99.05 113.47 102.99 110.44
TRINITY_DN0_c1_g1      101.09 138.78  93.05 101.30 131.10 174.53
TRINITY_DN100001_c0_g1   0.00   0.00  24.21   0.00   0.00   0.00
TRINITY_DN100002_c0_g1  32.10   0.00   0.00   0.00  49.69   0.00
TRINITY_DN100003_c0_g1   0.00   0.00   0.00   0.00   0.00   0.00
> 
> #Sample description
> colData <- read.delim("./Sample_info.txt")
> head(colData)
  FileName Condition
1       V2  Luz_P1H0
2       V3  Luz_P2H0
3       V4  Osc_P17D
4       V5  Osc_P27D
5       V6 Rec_P148H
6       V7 Rec_P248H
> 
> #Fix
> countData <- round(countData)
> 
> # Revisamos la estructura de los datos
> head(countData)
                        V2  V3  V4  V5  V6  V7
TRINITY_DN0_c0_g1      549 639 522 617 606 716
TRINITY_DN0_c0_g2       76  98  99 113 103 110
TRINITY_DN0_c1_g1      101 139  93 101 131 175
TRINITY_DN100001_c0_g1   0   0  24   0   0   0
TRINITY_DN100002_c0_g1  32   0   0   0  50   0
TRINITY_DN100003_c0_g1   0   0   0   0   0   0
> 
> #SummarizedExperiment
> matriz.se <-SummarizedExperiment(assays=list(counts=countData), colData=colData)
> 
> # Preprocess the expression data
> final_exp <- exp_preprocess(
+   matriz.se, 
+   min_exp = 10, 
+   variance_filter = TRUE, 
+   n = 2000
+ )
Number of removed samples: 0
> 
> 
> #Gene regulatory network inference
> #BioNERO requires only 2 objects for GRN inference: the expression data (SummarizedExperiment, 
> #matrix or data frame) and a character vector of regulators (transcription factors or miRNAs). 
> #The transcription factors used in this vignette were downloaded from PlantTFDB 4.0 (Jin et al. 2017).
> 
> #TF data
> TF <- read.delim("TF_complete.txt",header=T)
> head(TF, 10)
                    Gene    Family
1   TRINITY_DN2801_c0_g1  C2C2-Dof
2   TRINITY_DN2815_c0_g2       C3H
3  TRINITY_DN28191_c0_g1    E2F-DP
4  TRINITY_DN28238_c0_g2      C2H2
5   TRINITY_DN2823_c0_g1      bHLH
6  TRINITY_DN28244_c0_g1      WRKY
7    TRINITY_DN282_c0_g1      bHLH
8  TRINITY_DN28431_c0_g1 MADS-MIKC
9  TRINITY_DN28529_c0_g1      Tify
10 TRINITY_DN28635_c0_g1       HSF
> dim(TF)
[1] 1611    2
> 
> 
> #Consensus GRN inference
> # Using 10 trees for demonstration purposes. Use the default: 1000
> grn <- exp2grn(
+   exp = final_exp, 
+   regulators = TF$Gene, 
+   nTrees = 1000
+ )
The top number of edges that best fits the scale-free topology is 1748
> 
> ## The top number of edges that best fits the scale-free topology is 247
> head(grn)
                Regulator                Target
4695 TRINITY_DN4130_c1_g1  TRINITY_DN1158_c0_g1
3099 TRINITY_DN2572_c0_g1  TRINITY_DN1292_c0_g1
4254 TRINITY_DN3615_c0_g1  TRINITY_DN1480_c0_g1
8577  TRINITY_DN978_c1_g1  TRINITY_DN1448_c0_g1
3098 TRINITY_DN2572_c0_g1 TRINITY_DN12816_c0_g1
1736  TRINITY_DN174_c0_g1   TRINITY_DN136_c3_g1
> 
> #Gene regulatory network analysis
> #After inferring the GRN, BioNERO allows users to perform some common downstream analyses.
> #Hub gene identification
> hubs <- get_hubs_grn(grn)
> head(hubs,10)
                    Gene Degree
1   TRINITY_DN3522_c0_g1    194
2  TRINITY_DN12707_c0_g1    192
3   TRINITY_DN2459_c0_g1    183
4  TRINITY_DN13895_c0_g1     60
5    TRINITY_DN930_c0_g1     54
6   TRINITY_DN1408_c0_g1     49
7   TRINITY_DN3903_c0_g2     48
8   TRINITY_DN7084_c0_g1     42
9    TRINITY_DN483_c0_g1     40
10  TRINITY_DN3920_c0_g2     39
> 
> #Network visualization
> plot_grn(grn)
> head(grn, 5)
                Regulator                Target
4695 TRINITY_DN4130_c1_g1  TRINITY_DN1158_c0_g1
3099 TRINITY_DN2572_c0_g1  TRINITY_DN1292_c0_g1
4254 TRINITY_DN3615_c0_g1  TRINITY_DN1480_c0_g1
8577  TRINITY_DN978_c1_g1  TRINITY_DN1448_c0_g1
3098 TRINITY_DN2572_c0_g1 TRINITY_DN12816_c0_g1
> #write.csv(grn, "Interation_table_2024.csv")
> 
> #Table regulator vs all targets
> library(dplyr)
> 
> # Agrupar los objetivos por regulador y concatenarlos en una sola cadena de caracteres
> tabla_targets <- grn %>%
+   group_by(Regulator) %>%
+   summarise(Targets = paste(Target, collapse = ", "))
> 
> # Imprimir la tabla
> print(tabla_targets)
# A tibble: 63 × 2
   Regulator             Targets                                                                               
   <chr>                 <chr>                                                                                 
 1 TRINITY_DN1025_c0_g1  TRINITY_DN11739_c0_g1, TRINITY_DN2403_c0_g2, TRINITY_DN4026_c0_g1, TRINITY_DN3028_c0_…
 2 TRINITY_DN1064_c0_g1  TRINITY_DN14594_c0_g1, TRINITY_DN16227_c0_g1, TRINITY_DN1448_c0_g1, TRINITY_DN15884_c…
 3 TRINITY_DN11113_c0_g1 TRINITY_DN4841_c1_g1, TRINITY_DN1343_c0_g1, TRINITY_DN1355_c0_g2, TRINITY_DN929_c0_g1…
 4 TRINITY_DN1178_c0_g1  TRINITY_DN3473_c0_g2, TRINITY_DN7092_c0_g1, TRINITY_DN1710_c0_g1, TRINITY_DN6895_c0_g…
 5 TRINITY_DN1200_c0_g1  TRINITY_DN5827_c0_g2, TRINITY_DN8217_c0_g1, TRINITY_DN7150_c0_g1, TRINITY_DN823_c0_g1…
 6 TRINITY_DN12707_c0_g1 TRINITY_DN1458_c0_g1, TRINITY_DN15743_c0_g1, TRINITY_DN1322_c3_g1, TRINITY_DN3057_c1_…
 7 TRINITY_DN1363_c0_g1  TRINITY_DN10852_c0_g1, TRINITY_DN16610_c0_g1, TRINITY_DN11059_c0_g1, TRINITY_DN10763_…
 8 TRINITY_DN13895_c0_g1 TRINITY_DN1181_c0_g1, TRINITY_DN12048_c0_g1, TRINITY_DN18111_c0_g1, TRINITY_DN1679_c1…
 9 TRINITY_DN1408_c0_g1  TRINITY_DN11139_c0_g1, TRINITY_DN1399_c0_g1, TRINITY_DN1358_c0_g1, TRINITY_DN1847_c0_…
10 TRINITY_DN1433_c0_g2  TRINITY_DN2278_c0_g1, TRINITY_DN7937_c0_g1, TRINITY_DN6625_c0_g1, TRINITY_DN1462_c0_g…
# ℹ 53 more rows
# ℹ Use `print(n = ...)` to see more rows
> 

This is the network:

WhatsApp Image 2024-10-20 at 8 33 36 PM

almeidasilvaf commented 1 month ago

Hi, @Karenmagh

Thank you for using BioNERO. :-)

I'm not sure if I understand your problem. Would you like to have all TFs in the plot labelled instead of only the top N hubs? If so, you can change the value to parameter show_labels (see docs here).

By default, plot_grn() shows the top N hubs (with N specified in argument top_n_hubs), but you can choose to show all hubs, or even all nodes. To label all hubs, for instance, you'd run:

plot_grn(grn, show_labels = "allhubs")

To label all nodes, you'd run:

plot_grn(grn, show_labels = "all")

Let me know if that answers your question.

Best, Fabricio

Karenmagh commented 1 month ago

Thank you so much for your support. You can close this issue.