randel / MIND

Using Bulk Gene Expression to Estimate Cell-Type-Specific Gene Expression via Deconvolution
https://randel.github.io/MIND/
43 stars 9 forks source link

Error in if (!is.symmetric.matrix(x)) stop("argument x is not a symmetric matrix") : missing value where TRUE/FALSE needed #9

Closed Ci-TJ closed 2 years ago

Ci-TJ commented 2 years ago

Hi! I got some troubles when I used bMIND. How could I solve this?

suppressPackageStartupMessages(library(MIND))
library(data.table)
library(tidytable)
sce <- fread("ref_tpm_ndLV_GSE121893.txt", sep = '\t', data.table= F) %>% data.frame()
rownames(sce) <- sce$GeneSymbol
ref <- as.matrix(apply(sce[-1,-1],2, as.numeric))
rownames(ref) <- sce$GeneSymbol[-1]

metadata <- fread("ref_meta_GSE121893.txt", sep = '\t', data.table= F) %>% select.(ID,cell_type) %>% data.frame(); rownames(metadata) <- metadata$ID; colnames(metadata)[1] <- "sample"
ref_meta <- metadata

#table(ref_meta$cell_type, ref_meta$sample)

be <- fread('wp40_tpm.txt', sep= '\t', data.table = F)%>% data.frame(); rownames(be) <- be$GeneSymbol
bulk <- as.matrix(be[,-1])
dim(bulk);dim(ref);dim(ref_meta)
#[1] 20643    42
#[1] 19948  1151
#[1] 1151   10
all(colnames(ref) == rownames(ref_meta))
#[1] TRUE

#Check for the overlapping genesymbol
length(intersect(rownames(sce), rownames(be)))
#[1] 15017

is.numeric(bulk);is.numeric(ref)
#[1] TRUE
#[1] TRUE

prior = get_prior(sc = ref, meta_sc = ref_meta)
Error in if (!is.symmetric.matrix(x)) stop("argument x is not a symmetric matrix") :
  missing value where TRUE/FALSE needed

> head(ref_meta)
                         sample cell_type
SC_100355_0_19   SC_100355_0_19        FB
SC_100355_0_44   SC_100355_0_44        FB
SC_100355_0_45   SC_100355_0_45        FB
SC_100355_10_35 SC_100355_10_35        CM
SC_100355_10_59 SC_100355_10_59        CM
SC_100355_10_6   SC_100355_10_6        CM
> head(ref)[,1:4]
         SC_100355_0_19 SC_100355_0_44 SC_100355_0_45 SC_100355_10_35
TSPAN6                0         0.0000         0.0000               0
DPM1                  0         0.0000         0.0000               0
SCYL3                 0         0.0000         0.0000               0
C1orf112              0         0.0000         0.0000               0
FGR                   0         0.0000         0.0000               0
CFH                   0       278.8155       204.3904               0

Best, Qin

randel commented 2 years ago

The sample means biological tissue samples before scRNA-seq, not the cell ID. The idea is to generate pseudo bulk data and then get the prior info.

You may set Individual as sample by colnames(metadata)[4] <- "sample"

Ci-TJ commented 2 years ago

Thanks for your timely response. I changed some codes, but still has troubles. By the way, I have sorted the GeneSymbols and just retained the GeneSymbols that were in bulk.

#Bulk
be <- fread('wp40_tpm.txt', sep= '\t', data.table = F)%>% arrange.(GeneSymbol) %>% data.frame(); rownames(be) <- be$GeneSymbol
bulk <- as.matrix(be[,-1])

#single cell RNA-seq
sce <- fread("ref_tpm_ndLV_GSE121893.txt", sep = '\t', data.table= F) %>% arrange.(GeneSymbol) %>% data.frame()
rownames(sce) <- sce$GeneSymbol
ref <- as.matrix(apply(sce[-1,-1],2, as.numeric))
rownames(ref) <- sce$GeneSymbol[-1]
ref <- ref[rownames(ref) %in% rownames(bulk),] #genesymbol of prior must in rownames(bulk) 

There were 50 or more warnings (use warnings() to see the first 50)
>
#Meta
metadata <- fread("ref_meta_GSE121893.txt", sep = '\t', data.table= F) %>% select.(sample,cell_type, ID) %>% data.frame(); rownames(metadata) <- metadata$ID; 
colnames(metadata)[1] <- "sample"
ref_meta <- metadata
> dim(bulk);dim(ref);dim(ref_meta)
[1] 20643    42
[1] 15017  1151
[1] 1151    2
#Check for the overlapping genesymbol
length(intersect(rownames(sce), rownames(be)))
[1] 15017

is.numeric(bulk);is.numeric(ref)
[1] TRUE
[1] TRUE
prior = get_prior(sc = ref, meta_sc = ref_meta)
> frac = est_frac(sig = prior$profile, bulk = log2(1+bulk))
  CM   EC   FB   MP  SMC
0.62 0.18 0.17 0.00 0.02
> deconv = bMIND2(bulk = log2(1+bulk[rownames(prior$profile)[1:20],]), frac = frac, profile = prior$profile, covariance = prior$covariance, noRE = F, ncore = 20)
[1] "20 errors"
List of 1
 $ :List of 2
  ..$ message: chr "V is the wrong dimension for some prior$G/prior$R elements"
  ..$ call   : language priorformat(if (NOpriorG) {     NULL ...
  ..- attr(*, "class")= chr [1:3] "simpleError" "error" "condition"
NULL
Error in `rownames<-`(`*tmp*`, value = names(res)) :
  attempt to set 'rownames' on an object with no dimensions
>

> head(ref_meta)
                sample cell_type              ID
SC_100355_0_19     N13        FB  SC_100355_0_19
SC_100355_0_44     N13        FB  SC_100355_0_44
SC_100355_0_45     N13        FB  SC_100355_0_45
SC_100355_10_35    N13        CM SC_100355_10_35
SC_100355_10_59    N13        CM SC_100355_10_59
SC_100355_10_6     N13        CM  SC_100355_10_6
> head(prior$profile)
             CM       EC       FB       MP      SMC
A2M    5.895741 8.880472 7.413108 7.294460 7.865494
A4GALT 4.047221 4.469456 5.251129 3.774289 4.655965
AAAS   2.566100 4.255511 2.882695 2.795215 3.281809
AAGAB  2.939477 2.652552 1.956149 1.593693 4.741377
AAK1   3.363754 4.368886 2.365620 4.141436 4.198291
AAMDC  7.165401 6.366776 6.639057 5.630975 6.565367
>
> head(bulk)[,1:5]
                 H01         H02         H03         H04         H05
A1BG       1.2911502   1.1013011   1.1730039   1.2949625   1.4256415
A1BG-AS1   0.8753839   0.8289749   0.8688837   0.9099683   0.9306645
A2M      201.6659053 194.0972743 200.7630030 206.1492072 209.0499928
A2M-AS1    1.9282218   1.9524631   1.9471302   1.9463397   1.9546481
A2MP1      0.2922756   0.2972607   0.3040702   0.3062221   0.2988039
A4GALT    20.1340374  19.4499368  20.1795049  21.0648165  21.4648625
>
randel commented 2 years ago

I have no access to your processed GSE data, so I can only guess the reason.

It may be caused by rare cell types. How about if you focus on the first three cell types with an average fraction > 5% for frac, profile, and covariance?

If it still does not work, a producible data/code example would be helpful.