broadinstitute / infercnv

Inferring CNV from Single-Cell RNA-Seq
Other
566 stars 166 forks source link

gene position file not working #638

Open mbihie opened 9 months ago

mbihie commented 9 months ago

I followed the instructions on generating a gene position file and infercnv will not run. I added the chromosome number, start and end as well. I also made sure that the number of rows of the matrix (gene names) matched the number of rows of the gene position file and that the number of columns of the matrix and the number of rows of the annotation file matched as well. Please let me know if you can see where I went wrong.

Here is the data for the annotation file and matrix: https://drive.google.com/drive/folders/1VmPan5V19Hq--fMnFHOta67TIpO60srE

I also used gencode_v21_gen_pos.complete.txt for the position file.

this is the error I got:

Error in names(gene_order) <- c(C_CHR, C_START, C_STOP) : 
  'names' attribute [3] must be the same length as the vector [0]

This is the code to generate the files for infercnv input

#install.packages("dplyr")
library(dplyr)
#install.packages("tidyverse")
library(tidyverse)
#install.packages("vroom")
library(vroom)
#install.packages("SCEVAN")
library(SCEVAN)
#install.packages("vcfR")
library(vcfR)

# Reading the metadata with the dataset:
df_id <- read.csv("~/CNV/data/S01_metacells.csv")

# Finding all of the cells that are correlated with TH179 and TH238, setting it to df_id_f:
df_id_f <- filter(df_id, patient_id %in% c("TH179", "TH238", "TH179_NAT", "TH238_NAT"))

# Keeping likely necessary columns:
df_id_f <- df_id_f %>%
  select(X, well, plate, cell_id, sample_name, patient_id)

#output
head(df_id_f)

# load dataframe with raw counts 
df <- vroom("~/CNV/data/S01_datafinal.csv")

#output
head(df)

#extract cell ids of partients of interest
cells <- df_id_f$cell_id

#filter df of gene counts to cell ids of partients of interest
df_f <- df %>% select(all_of(cells))

#filter df to patient
df_id_179 <- df_id_f %>% filter(patient_id == "TH179" | patient_id == "TH179_NAT")
df_id_238 <- df_id_f %>% filter(patient_id == "TH238" | patient_id == "TH238_NAT")

#extract cell ids of partients of interest
cells179 <- df_id_179$cell_id
cells238 <- df_id_238$cell_id

#filter df of gene counts to cell ids of partients of interest
df_f_179 <- df %>% select(all_of(cells179))
df_f_238 <- df %>% select(all_of(cells238))

#rownames (genes)
gene_list = c(df[["...1"]])

#add gene names to row names
rownames(df_f_179) <- gene_list
rownames(df_f_238) <- gene_list

#extract normal cells
norm179df <- df_id_f %>% filter(patient_id == "TH179_NAT")
norm238df <- df_id_f %>% filter(patient_id == "TH238_NAT")

#extract cell ids
norm179 <- norm179df[["cell_id"]]
norm238 <- norm238df[["cell_id"]]

#gn order file
  #pull data
gn179 <- read.table(file = "~/CNV/data/gencode_v21_gen_pos.complete.txt", 
                     header = FALSE, 
                     sep = "", 
                     dec = ".")

  #format table
gn179 <- gn179 %>%
  mutate(V1 = sub("\\|.*","\\",V1)) %>%
  filter(V1 %in% (gene_list)) %>%
  distinct(V1, .keep_all = TRUE) %>%
  arrange((V1))

  #write table
write.table(x = gn179,
            file = "~/CNV/data/cnv-gn -position-file-179.txt",
            col.names = FALSE,
            row.names = FALSE,
            quote = FALSE)

#raw_counts_matrix
  #edit dataframe to match the gene order file
df_f_179 <- df_f_179 %>%
  rownames_to_column(var = "gnrow") %>%
  filter(gnrow %in% gn179$V1) %>%
  column_to_rownames(var = "gnrow")

mtx_179 <- as.matrix(df_f_179)
write.table(mtx_179,"~/CNV/data/cnv-matrix-179.mtx")

#annotations_file (normal vs tumor)
  #format table
ann179 <- df_id_179 %>% 
  select(cell_id, patient_id) %>%
  mutate(patient_id =sub("TH179_NAT", 
                         "normal (TH179)", 
                         patient_id))

  #write table
write.table(x = ann179,
            file = "~/CNV/data/cnv-annotation-file-179.txt",
            col.names = FALSE,
            row.names = FALSE,
            quote = FALSE)

This was the infercnv object I tried to create.

library("infercnv")
infercnv_obj = CreateInfercnvObject(raw_counts_matrix="~/CNV/data/cnv-matrix-179.mtx",
                                    annotations_file="~/CNV/data/cnv-annotation-file-179.txt",
                                    delim="\t",
                                    gene_order_file="~/CNV/data/cnv-gn -position-file-179.txt",
                                    ref_group_names=c("TH179","normal (TH179)"))

cnv-annotation-file-179.txt cnv-gn -position-file-179.txt