Mouse data. "subscript out of bounds" error.

Getting an error when running numbat on mouse data. The same script works with different mouse samples, but not with new set of samples. Have used reference generated from the sample itself, with no success. It is for some reason filtering most of the genes (there are ~20,000 in the original seurat object, only 75 are left by numbat). Full code and error below. Is this an issue with our data? Thanks for your help!!!

Approximating initial clusters using smoothed expression .. Mem used: 2.46Gb number of genes left: 75 Error in h(simpleError(msg, call)) : error in evaluating the argument 'x' in selecting a method for function 't': subscript out of bounds

library(dplyr)
library(data.table)
library(numbat)
library(stringr)
library(glue)
library(Seurat)
library(Matrix)

pat = '20'
pu_dir = paste0(pat,'/pileup')

vcf_pu = fread(glue('{pu_dir}/cellSNP.base.vcf'), skip = '#CHROM') %>% 
  rename(CHROM = `#CHROM`) %>%
  mutate(snp_id = paste(CHROM, POS, REF, ALT, sep = '_')) %>%
  mutate(CHROM = str_remove(CHROM, 'chr')) %>%
  mutate(CHROM = factor(CHROM, unique(CHROM))) %>%
  filter(CHROM != 'X')

vcf_phased = vcf_pu %>% mutate(GT = '1|0', cM = 0)

# pileup count matrices
AD = readMM(glue('{pu_dir}/cellSNP.tag.AD.mtx'))
DP = readMM(glue('{pu_dir}/cellSNP.tag.DP.mtx'))
barcodes = fread(glue('{pu_dir}/cellSNP.samples.tsv'), header = F)$V1

# convert to dataframe
DP = as.data.frame(Matrix::summary(DP)) %>%
  mutate(
    cell = barcodes[j],
    snp_id = vcf_pu$snp_id[i]
  ) %>%
  select(-i, -j) %>%
  rename(DP = x) %>%
  select(cell, snp_id, DP)

AD = as.data.frame(Matrix::summary(AD)) %>%
  mutate(
    cell = barcodes[j],
    snp_id = vcf_pu$snp_id[i]
  ) %>%
  select(-i, -j) %>%
  rename(AD = x) %>%
  select(cell, snp_id, AD)

df_allele = DP %>% left_join(AD, by = c("cell", "snp_id")) %>%
  mutate(AD = ifelse(is.na(AD), 0, AD))

# attach genotype info
df_allele = df_allele %>% inner_join(
  vcf_phased %>% select(snp_id, CHROM, POS, REF, ALT, GT, cM),
  by = 'snp_id')

## sample matrix

seu <- readRDS(paste0(pat,'/seu_processed.rds'))
count_matrix <- seu@assays$RNA@counts
output_file <- "count_matrixseu.mtx.gz"
writeMM(count_matrix, file = output_file)
count_mat = readMM('count_matrixseu.mtx.gz')
colnames(count_mat) = colnames(seu)
rownames(count_mat) = rownames(seu@assays$RNA@counts)
count_mat = as.matrix(count_mat)
count_mat = rowsum(count_mat, rownames(count_mat))
count_mat = as(count_mat, "dgCMatrix")
count_mat_tumor = count_mat

## expression reference

#refseu <- subset(seu, idents = 'Naive B Cells')
count_matrix <- refseu@assays$RNA@counts
output_file <- "count_matrix.mtx.gz"
writeMM(count_matrix, file = output_file)
count_mat = readMM('count_matrix.mtx.gz')
colnames(count_mat) = colnames(refseu)
rownames(count_mat) = rownames(refseu@assays$RNA@counts)
count_mat = as.matrix(count_mat)
count_mat = rowsum(count_mat, rownames(count_mat))
count_mat = as(count_mat, "dgCMatrix")
count_mat_ref = count_mat

p2 = pagoda2::basicP2proc(count_mat_ref, n.cores = 30)

clusters = p2$clusters$PCA$multilevel

ref_annot = data.frame(
  cell = names(clusters),
  group = unname(clusters)
)

ref_pancreas = numbat::aggregate_counts(
  count_mat_ref,
  ref_annot %>% group_by(group) %>% filter(n() > 100)
)

## numbat

run_numbat(
  count_mat_tumor, 
  ref_pancreas, 
  df_allele,
  t = 1e-5,
  ncores = 20,
  skip_nj = TRUE,
  min_LLR = 30,
  out_dir = './results',
  # mouse specific settings
  genome = "mm10",
  nu = 0
)

kharchenkolab / numbat

Mouse data. "subscript out of bounds" error. #157