MarioniLab / scran

Clone of the Bioconductor repository for the scran package.
https://bioconductor.org/packages/devel/bioc/html/scran.html
39 stars 23 forks source link

`pseudoBulkDGE()` intermediates dropped if `row.data` supplied #90

Closed PeteHaitch closed 3 years ago

PeteHaitch commented 3 years ago
# Example adapted from ?pseudoBulkDGE
suppressPackageStartupMessages(library(scran))

set.seed(10000)
suppressPackageStartupMessages(library(scuttle))
sce <- mockSCE(ncells=1000)
sce$samples <- gl(8, 125) # Pretending we have 8 samples.

# Making up some clusters.
sce <- logNormCounts(sce)
clusters <- kmeans(t(logcounts(sce)), centers=3)$cluster

# Creating a set of pseudo-bulk profiles:
info <- DataFrame(sample=sce$samples, cluster=clusters)
pseudo <- sumCountsAcrossCells(sce, info)

# Making up an experimental design for our 8 samples.
pseudo$DRUG <- gl(2,4)[pseudo$sample]

# Making up some row metadata
rowData(pseudo)$X <- sample(letters, nrow(pseudo), replace = TRUE)

# DGE analysis:
out <- pseudoBulkDGE(pseudo,
                     label=pseudo$cluster,
                     condition=pseudo$DRUG,
                     design=~DRUG,
                     coef="DRUG2")
# Metadata exists
metadata(out[[1]])
#> $design
#>   (Intercept) DRUG2
#> 1           1     0
#> 2           1     0
#> 3           1     0
#> 4           1     0
#> 5           1     1
#> 6           1     1
#> 7           1     1
#> 8           1     1
#> attr(,"assign")
#> [1] 0 1
#> attr(,"contrasts")
#> attr(,"contrasts")$DRUG
#> [1] "contr.treatment"
#> 
#> 
#> $y
#> An object of class "DGEList"
#> $counts
#>           Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8
#> Gene_0001    6210    7095    5040    7163    8571    3986    3646    8961
#> Gene_0002     165      42     156     169     163     122      82     108
#> Gene_0003    5601    6496    4551    6992    5840    5792    3478    5091
#> Gene_0004     137     471    1004     413     316     215      62     240
#> Gene_0005   31416   35441   24130   27154   25395   20884   18890   30355
#> 1995 more rows ...
#> 
#> $samples
#>         group lib.size norm.factors sample cluster ncells DRUG
#> Sample1     1 16187024    1.0030809      1       1     43    1
#> Sample2     1 18577102    1.0019748      2       1     49    1
#> Sample3     1 14799216    0.9996731      3       1     39    1
#> Sample4     1 16923888    1.0040923      4       1     45    1
#> Sample5     1 15223503    0.9957293      5       1     40    2
#> Sample6     1 13599598    1.0073307      6       1     36    2
#> Sample7     1 11061716    0.9827435      7       1     29    2
#> Sample8     1 18572962    1.0055925      8       1     49    2
#> 
#> $design
#>   (Intercept) DRUG2
#> 1           1     0
#> 2           1     0
#> 3           1     0
#> 4           1     0
#> 5           1     1
#> 6           1     1
#> 7           1     1
#> 8           1     1
#> attr(,"assign")
#> [1] 0 1
#> attr(,"contrasts")
#> attr(,"contrasts")$DRUG
#> [1] "contr.treatment"
#> 
#> 
#> $common.dispersion
#> [1] 0.1157177
#> 
#> $trended.dispersion
#> [1] 0.03165464 0.34666546 0.03428029 0.28014539 0.01785835
#> 1995 more elements ...
#> 
#> $tagwise.dispersion
#> [1] 0.03260097 0.34073790 0.03385267 0.28807549 0.01728975
#> 1995 more elements ...
#> 
#> $AveLogCPM
#> [1]  8.643177  3.062714  8.458724  4.508804 10.734665
#> 1995 more elements ...
#> 
#> $trend.method
#> [1] "locfit"
#> 
#> $prior.df
#> [1] 111.6663
#> 
#> $prior.n
#> [1] 18.61104
#> 
#> $span
#> [1] 0.3685854
#> 
#> 
#> $fit
#> An object of class "DGEGLM"
#> $coefficients
#>           (Intercept)        DRUG2
#> Gene_0001   -7.871475  0.091381951
#> Gene_0002  -11.707698  0.002716088
#> Gene_0003   -7.948274 -0.008906604
#> Gene_0004  -10.366248 -0.825084677
#> Gene_0005   -6.338485 -0.074166281
#> 1995 more rows ...
#> 
#> $fitted.values
#>              Sample1    Sample2    Sample3    Sample4    Sample5    Sample6
#> Gene_0001  6193.7794  7100.4773  5643.5123  6482.2616  6335.7220  5725.8287
#> Gene_0002   133.5032   153.0465   121.6425   139.7213   124.9757   112.9452
#> Gene_0003  5735.8999  6575.5695  5226.3117  6003.0558  5307.4546  4796.5451
#> Gene_0004   510.9615   585.7603   465.5667   534.7601   208.9633   188.8480
#> Gene_0005 28690.1055 32890.0058 26141.2220 30026.3790 24869.9486 22475.9020
#>               Sample7    Sample8
#> Gene_0001  4543.62960  7806.2675
#> Gene_0002    89.62565   153.9830
#> Gene_0003  3806.21310  6539.3354
#> Gene_0004   149.85694   257.4645
#> Gene_0005 17835.35252 30642.3599
#> 1995 more rows ...
#> 
#> $deviance
#> Gene_0001 Gene_0002 Gene_0003 Gene_0004 Gene_0005 
#>  9.599702  4.182646  4.540738 10.070194  2.201386 
#> 1995 more elements ...
#> 
#> $method
#> [1] "oneway"
#> 
#> $counts
#>           Sample1 Sample2 Sample3 Sample4 Sample5 Sample6 Sample7 Sample8
#> Gene_0001    6210    7095    5040    7163    8571    3986    3646    8961
#> Gene_0002     165      42     156     169     163     122      82     108
#> Gene_0003    5601    6496    4551    6992    5840    5792    3478    5091
#> Gene_0004     137     471    1004     413     316     215      62     240
#> Gene_0005   31416   35441   24130   27154   25395   20884   18890   30355
#> 1995 more rows ...
#> 
#> $unshrunk.coefficients
#>           (Intercept)        DRUG2
#> Gene_0001   -7.871496  0.091383785
#> Gene_0002  -11.708671  0.002719113
#> Gene_0003   -7.948297 -0.008906808
#> Gene_0004  -10.366502 -0.825409996
#> Gene_0005   -6.338489 -0.074166630
#> 1995 more rows ...
#> 
#> $df.residual
#> [1] 6 6 6 6 6
#> 1995 more elements ...
#> 
#> $design
#>   (Intercept) DRUG2
#> 1           1     0
#> 2           1     0
#> 3           1     0
#> 4           1     0
#> 5           1     1
#> 6           1     1
#> 7           1     1
#> 8           1     1
#> attr(,"assign")
#> [1] 0 1
#> attr(,"contrasts")
#> attr(,"contrasts")$DRUG
#> [1] "contr.treatment"
#> 
#> 
#> $offset
#>         [,1]     [,2]     [,3]     [,4]     [,5]     [,6]     [,7]     [,8]
#> [1,] 16.6028 16.73941 16.50976 16.64832 16.53407 16.43285 16.20159 16.74279
#> attr(,"class")
#> [1] "CompressedMatrix"
#> attr(,"Dims")
#> [1] 5 8
#> attr(,"repeat.row")
#> [1] TRUE
#> attr(,"repeat.col")
#> [1] FALSE
#> 1995 more rows ...
#> 
#> $dispersion
#> [1] 0.03165464 0.34666546 0.03428029 0.28014539 0.01785835
#> 1995 more elements ...
#> 
#> $prior.count
#> [1] 0.125
#> 
#> $AveLogCPM
#> [1]  8.643177  3.062714  8.458724  4.508804 10.734665
#> 1995 more elements ...
#> 
#> $df.residual.zeros
#> [1] 6 6 6 6 6
#> 1995 more elements ...
#> 
#> $df.prior
#> Gene_0001 Gene_0002 Gene_0003 Gene_0004 Gene_0005 
#>  141.2887  141.2887  141.2887  141.2887  141.2887 
#> 1995 more elements ...
#> 
#> $var.post
#> Gene_0001 Gene_0002 Gene_0003 Gene_0004 Gene_0005 
#> 0.9795747 1.5940549 0.9491875 1.1585541 0.8556106 
#> 1995 more elements ...
#> 
#> $var.prior
#> [1] 0.9532297 1.6321449 0.9573579 1.1364796 0.8763644
#> 1995 more elements ...
#> 
#> $samples
#>         group lib.size norm.factors sample cluster ncells DRUG
#> Sample1     1 16187024    1.0030809      1       1     43    1
#> Sample2     1 18577102    1.0019748      2       1     49    1
#> Sample3     1 14799216    0.9996731      3       1     39    1
#> Sample4     1 16923888    1.0040923      4       1     45    1
#> Sample5     1 15223503    0.9957293      5       1     40    2
#> Sample6     1 13599598    1.0073307      6       1     36    2
#> Sample7     1 11061716    0.9827435      7       1     29    2
#> Sample8     1 18572962    1.0055925      8       1     49    2

# Repeat but supplying row.data
out2 <- pseudoBulkDGE(pseudo,
                      label=pseudo$cluster,
                      condition=pseudo$DRUG,
                      design=~DRUG,
                      coef="DRUG2",
                      row.data = rowData(pseudo))
# Metadata droppped
metadata(out2[[1]])
#> list()

Created on 2021-04-08 by the reprex package (v2.0.0)

Session info ``` r sessioninfo::session_info() #> ─ Session info ─────────────────────────────────────────────────────────────── #> setting value #> version R version 4.0.0 (2020-04-24) #> os Ubuntu 18.04.5 LTS #> system x86_64, linux-gnu #> ui X11 #> language en_AU:en #> collate en_AU.UTF-8 #> ctype en_AU.UTF-8 #> tz Australia/Melbourne #> date 2021-04-08 #> #> ─ Packages ─────────────────────────────────────────────────────────────────── #> package * version date lib source #> beachmat 2.6.4 2020-12-20 [1] Bioconductor #> Biobase * 2.50.0 2020-10-27 [1] Bioconductor #> BiocGenerics * 0.36.0 2020-10-27 [1] Bioconductor #> BiocNeighbors 1.8.2 2020-12-07 [1] Bioconductor #> BiocParallel 1.24.1 2020-11-06 [1] Bioconductor #> BiocSingular 1.6.0 2020-10-27 [1] Bioconductor #> bitops 1.0-6 2013-08-17 [1] CRAN (R 4.0.0) #> bluster 1.0.0 2020-10-27 [1] Bioconductor #> cli 2.4.0 2021-04-05 [1] CRAN (R 4.0.0) #> DelayedArray 0.16.3 2021-03-24 [1] Bioconductor #> DelayedMatrixStats 1.12.3 2021-02-03 [1] Bioconductor #> digest 0.6.27 2020-10-24 [1] CRAN (R 4.0.0) #> dqrng 0.2.1 2019-05-17 [1] CRAN (R 4.0.0) #> edgeR 3.32.1 2021-01-14 [1] Bioconductor #> evaluate 0.14 2019-05-28 [1] CRAN (R 4.0.0) #> fs 1.5.0 2020-07-31 [1] CRAN (R 4.0.0) #> GenomeInfoDb * 1.26.5 2021-04-05 [1] Bioconductor #> GenomeInfoDbData 1.2.4 2020-10-28 [1] Bioconductor #> GenomicRanges * 1.42.0 2020-10-27 [1] Bioconductor #> glue 1.4.2 2020-08-27 [1] CRAN (R 4.0.0) #> highr 0.8 2019-03-20 [1] CRAN (R 4.0.0) #> htmltools 0.5.1.1 2021-01-22 [1] CRAN (R 4.0.0) #> igraph 1.2.6 2020-10-06 [1] CRAN (R 4.0.0) #> IRanges * 2.24.1 2020-12-12 [1] Bioconductor #> irlba 2.3.3 2019-02-05 [1] CRAN (R 4.0.0) #> knitr 1.31 2021-01-27 [1] CRAN (R 4.0.0) #> lattice 0.20-41 2020-04-02 [4] CRAN (R 4.0.0) #> limma 3.46.0 2020-10-27 [1] Bioconductor #> locfit 1.5-9.4 2020-03-25 [1] CRAN (R 4.0.0) #> magrittr 2.0.1 2020-11-17 [1] CRAN (R 4.0.0) #> Matrix 1.3-2 2021-01-06 [4] CRAN (R 4.0.3) #> MatrixGenerics * 1.2.1 2021-01-30 [1] Bioconductor #> matrixStats * 0.58.0 2021-01-29 [1] CRAN (R 4.0.0) #> pkgconfig 2.0.3 2019-09-22 [1] CRAN (R 4.0.0) #> Rcpp 1.0.6 2021-01-15 [1] CRAN (R 4.0.0) #> RCurl 1.98-1.3 2021-03-16 [1] CRAN (R 4.0.0) #> reprex 2.0.0 2021-04-02 [1] CRAN (R 4.0.0) #> rlang 0.4.10 2020-12-30 [1] CRAN (R 4.0.0) #> rmarkdown 2.7 2021-02-19 [1] CRAN (R 4.0.0) #> rstudioapi 0.13 2020-11-12 [1] CRAN (R 4.0.0) #> rsvd 1.0.3 2020-02-17 [1] CRAN (R 4.0.0) #> S4Vectors * 0.28.1 2020-12-09 [1] Bioconductor #> scran * 1.18.5 2021-02-04 [1] Bioconductor #> scuttle * 1.0.4 2020-12-17 [1] Bioconductor #> sessioninfo 1.1.1 2018-11-05 [1] CRAN (R 4.0.0) #> SingleCellExperiment * 1.12.0 2020-10-27 [1] Bioconductor #> sparseMatrixStats 1.2.1 2021-02-02 [1] Bioconductor #> statmod 1.4.35 2020-10-19 [1] CRAN (R 4.0.0) #> stringi 1.5.3 2020-09-09 [1] CRAN (R 4.0.0) #> stringr 1.4.0 2019-02-10 [1] CRAN (R 4.0.0) #> SummarizedExperiment * 1.20.0 2020-10-27 [1] Bioconductor #> withr 2.4.1 2021-01-26 [1] CRAN (R 4.0.0) #> xfun 0.22 2021-03-11 [1] CRAN (R 4.0.0) #> XVector 0.30.0 2020-10-27 [1] Bioconductor #> yaml 2.2.1 2020-02-01 [1] CRAN (R 4.0.0) #> zlibbioc 1.36.0 2020-10-27 [1] Bioconductor #> #> [1] /home/peter/R/x86_64-pc-linux-gnu-library/4.0 #> [2] /usr/local/lib/R/site-library #> [3] /usr/lib/R/site-library #> [4] /usr/lib/R/library ```
PeteHaitch commented 3 years ago

Thanks