Open AAleotti opened 3 years ago
I also have the same problem. Please let me know if anyone know how to fix it.
If you don't want to save to disk the h5 content I wrote a custom function that accepts a UMIs matrix and gene list from memory.
It is a modified version of mcell_import_scmat_tsv
.
mcell_import_scmat_memory <- function(mat_nm, umis, genes, meta_fn = NULL, dset_nm=NULL,
force = FALSE, paralogs_policy = "sum")
{
# genes already ordered by the umis rows
if (!scdb_is_valid()) {
stop("MCERR - scdb is not initialized, cannot import")
}
if (!force & !is.null(scdb_mat(mat_nm))) {
return(TRUE)
}
if (any(is.na(umis)) | is.null(umis)) {
stop("MCERR - umi matrix empty")
}
if (!is.null(genes)) {
# Remove rows without gene name
umis = umis[!is.na(genes), ]
# remove NA gene names
genes = genes[!is.na(genes)]
unique_genes = names(which(table(genes) == 1))
# no need to remove first column, clean matrix
umis1 = umis[genes %in% unique_genes, ]
rownames(umis1) = genes[genes %in% unique_genes]
if (paralogs_policy == "remove") {
umis = umis1
}
else if (paralogs_policy == "sum") {
non_unique_genes = setdiff(genes, unique_genes)
umis2 = as.matrix(umis[genes %in% non_unique_genes, ])
message(sprintf("summing up total of %d paralog genes into %d unique genes",
nrow(umis2), length(non_unique_genes)))
dup_genes = genes[genes %in% non_unique_genes]
umis2s = apply(umis2, 2, function(x) {
tapply(x, INDEX = dup_genes, FUN = sum)
})
umis = rbind(umis1, umis2s)
rownames(umis) = c(rownames(umis1), rownames(umis2s))
}
}
else {
stop("MCERR - genes list empty")
}
if (!is.null(meta_fn)) {
# Not sure how this is relevant to the PerturbSeq, not touching
md = fread(meta_fn, sep = "\t")
mandatory = c("Cell.ID", "Amp.Batch.ID", "Seq.Batch.ID",
"Batch.Set.ID")
miss_f = setdiff(mandatory, colnames(md))
if (length(miss_f) > 0) {
stop("MC-ERR: missing fields in matrix metadata : ",
miss_f)
}
rownames(md) = md$Cell.ID
md = md %>% select(-Cell.ID)
}
else {
if (is.null(dset_nm)) {
stop("either provide a meta data file, or supply a dataset name (parameter dset_nm) and we'll fake it for you...")
}
dsets = data.frame(batch_set_id = dset_nm, amp_batch_id = dset_nm,
seq_batch_id = dset_nm)
md = matrix(c("import", dset_nm, dset_nm, dset_nm), nrow = ncol(umis),
ncol = 4, byrow = T, dimnames = list(colnames(umis),
c("type", "batch_set_id", "amp_batch_id", "seq_batch_id")))
md = as.data.frame(md)
}
sparse_m = Matrix::Matrix(as.matrix(umis))
rownames(sparse_m) = rownames(umis)
scmat = scm_new_matrix(sparse_m, md)
scdb_add_mat(mat_nm, scmat)
return(TRUE)
}
Hello, is there a way to import a dataset that is saved in .h5ad or in .loom format into metacell? Thanks! Alessandra